io.wcm.caravan.pipeline.impl.operators.CachePointTransformer.java Source code

Java tutorial

Introduction

Here is the source code for io.wcm.caravan.pipeline.impl.operators.CachePointTransformer.java

Source

/*
 * #%L
 * wcm.io
 * %%
 * Copyright (C) 2014 wcm.io
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */
package io.wcm.caravan.pipeline.impl.operators;

import io.wcm.caravan.commons.metrics.rx.HitsAndMissesCountingMetricsOperator;
import io.wcm.caravan.commons.metrics.rx.TimerMetricsOperator;
import io.wcm.caravan.io.http.request.CaravanHttpRequest;
import io.wcm.caravan.pipeline.JsonPipelineInputException;
import io.wcm.caravan.pipeline.JsonPipelineOutput;
import io.wcm.caravan.pipeline.cache.CacheDateUtils;
import io.wcm.caravan.pipeline.cache.CachePersistencyOptions;
import io.wcm.caravan.pipeline.cache.CacheStrategy;
import io.wcm.caravan.pipeline.cache.spi.CacheAdapter;
import io.wcm.caravan.pipeline.impl.JacksonFunctions;
import io.wcm.caravan.pipeline.impl.JsonPipelineContextImpl;
import io.wcm.caravan.pipeline.impl.JsonPipelineOutputImpl;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.concurrent.TimeUnit;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.http.HttpStatus;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import rx.Observable;
import rx.Observable.Transformer;
import rx.Observer;
import rx.Subscriber;
import rx.exceptions.Exceptions;

import com.codahale.metrics.Counter;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.Timer;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;

/**
 * a rather complicated transformer that implements the pipelines caching capabilities
 */
public class CachePointTransformer implements Transformer<JsonPipelineOutput, JsonPipelineOutput> {

    private static final Logger log = LoggerFactory.getLogger(CachePointTransformer.class);

    private JsonPipelineContextImpl context;
    private final List<CaravanHttpRequest> requests;
    private final String descriptor;
    private final CacheStrategy strategy;
    private final String correlationId;

    /**
     * @param context a context of the actual JSON pipeline
     * @param requests the outgoing REST request(s) used to obtain the JSON data to be cached
     * @param descriptor the unique id of the pipeline (to build a cache key)
     * @param strategy the CacheStrategy to get storage time and refresh interval
     */
    public CachePointTransformer(JsonPipelineContextImpl context, List<CaravanHttpRequest> requests,
            String descriptor, CacheStrategy strategy) {
        super();
        this.context = context;
        this.requests = requests;
        this.descriptor = descriptor;
        this.strategy = strategy;
        StringBuffer sb = new StringBuffer();
        for (CaravanHttpRequest request : requests) {
            if (sb.length() == 0) {
                sb.append(request.getCorrelationId());
            } else {
                sb.append(",").append(request.getCorrelationId());
            }
        }
        this.correlationId = sb.toString();
    }

    private static SortedSet<String> getSourceServiceIds(List<CaravanHttpRequest> requests) {
        SortedSet<String> sourceServiceIds = new TreeSet<String>();
        for (CaravanHttpRequest request : requests) {
            sourceServiceIds.add(request.getServiceId());
        }
        return sourceServiceIds;
    }

    private String getSourceServicePrefix() {
        return StringUtils.join(getSourceServiceIds(requests), '+');
    }

    @Override
    public Observable<JsonPipelineOutput> call(Observable<JsonPipelineOutput> output) {

        // the code within the lambda passed to Observable#create will be executed when subscribe is called on the "cachedSource" observable
        Observable<JsonPipelineOutput> cachedSource = Observable.create((subscriber) -> {

            // construct a unique cache key from the pipeline's descriptor
            String sourceServicePrefix = getSourceServicePrefix();
            CacheAdapter cacheAdapter = context.getCacheAdapter();
            final String cacheKey = sourceServicePrefix + ":" + descriptor;

            // the caching strategy determines if the storage time should be extended for cache hits(i.e. Time-to-Idle behaviour)
            CachePersistencyOptions options = strategy.getCachePersistencyOptions(requests);

            // try to asynchronously(!) fetch the response from the cache
            Observable<String> cachedJsonString = cacheAdapter.get(cacheKey, options);

            // create service specific metrics
            MetricRegistry metricRegistry = context.getMetricRegistry();
            Timer timer = metricRegistry
                    .timer(MetricRegistry.name(getClass(), sourceServicePrefix, "latency", "get"));
            Counter hitsCounter = metricRegistry
                    .counter(MetricRegistry.name(getClass(), sourceServicePrefix, "hits"));
            Counter missesCounter = metricRegistry
                    .counter(MetricRegistry.name(getClass(), sourceServicePrefix, "misses"));

            // CacheResponseObserver will decide what to do when the response is ready (or could not be retrieved from cache)
            cachedJsonString.lift(new TimerMetricsOperator<String>(timer))
                    .lift(new HitsAndMissesCountingMetricsOperator<String>(hitsCounter, missesCounter))
                    .subscribe(new CacheResponseObserver(cacheKey, output, subscriber));
        });

        return cachedSource;
    }

    /**
     * An observer that is subscribed to the {@link Observable} returned by
     * {@link CacheAdapter#get(String, CachePersistencyOptions)} , and is responsible for
     * <ul>
     * <li>unwrapping the JSON content from the caching envelope if it was successfully retrieved from cache</li>
     * <li>forwarding the unwrapped response to the subscriber given in the constructor</li>
     * <li>fetch the response from the Pipeline's dataSource if it couldn't be retrieved from cache</li>
     * <li>store the fetched responses to couchbase (wrapped in an envelope with metadata</li> *
     * </ul>
     */
    public final class CacheResponseObserver implements Observer<String> {

        private final String cacheKey;
        private final Observable<JsonPipelineOutput> originalSource;
        private final Subscriber<? super JsonPipelineOutput> subscriber;

        private boolean cacheHit;

        private CacheResponseObserver(String cacheKey, Observable<JsonPipelineOutput> originalSource,
                Subscriber<? super JsonPipelineOutput> subscriberToForwardTo) {
            this.cacheKey = cacheKey;
            this.originalSource = originalSource;
            this.subscriber = subscriberToForwardTo;
        }

        @Override
        public void onNext(String cachedContent) {

            CacheEnvelope cacheEntry = CacheEnvelope.fromEnvelopeString(cachedContent, cacheKey);
            if (cacheEntry == null) {

                log.warn("CACHE ERROR for {} - the cached response could not be parsed,\n{}", this.cacheKey,
                        correlationId);
                // ignore cache envelopes that can not be parsed
                return;
            }
            cacheHit = true;

            int responseAge = cacheEntry.getResponseAge();
            int refreshInterval = strategy.getCachePersistencyOptions(requests).getRefreshInterval();

            int expirySeconds = cacheEntry.getExpirySeconds();

            int maxAgeFromClient = getClientMaxAge();

            // check if the content from cache is fresh enough to serve it
            if (responseAge < refreshInterval && responseAge < maxAgeFromClient && expirySeconds > 0) {
                log.debug("CACHE HIT for {},\n{}", this.cacheKey, correlationId);

                // the document could be retrieved, so forward it (parsed as a JsonNode) to the actual subscriber to the cachedSource
                serveCachedContent(cacheEntry, refreshInterval);
            } else {
                // this means the cached content is outdated - we better fetch the data from the backend
                String reason;
                if (responseAge >= refreshInterval) {
                    reason = "it's " + responseAge
                            + " seconds old and the cache strategy has a refresh interval of " + refreshInterval
                            + " seconds.";
                } else if (responseAge >= maxAgeFromClient) {
                    reason = "it's " + responseAge + " seconds old and the client requested a max-age of "
                            + maxAgeFromClient + " seconds.";
                } else {
                    reason = "it has expired " + (-expirySeconds)
                            + " seconds ago, according to the original max-age header from the http-response";
                }

                log.debug("CACHE STALE - content for {} is available, but {},\n{}", cacheKey, reason,
                        correlationId);

                fetchAndStore(new Subscriber<JsonPipelineOutput>() {

                    @Override
                    public void onNext(JsonPipelineOutput fetchedOutput) {
                        subscriber.onNext(fetchedOutput);
                    }

                    @Override
                    public void onCompleted() {
                        subscriber.onCompleted();
                    }

                    @Override
                    public void onError(Throwable e) {
                        Exceptions.throwIfFatal(e);

                        // if the cached response was a an error as well (e.g. 404), then do not use it as a fallback.
                        // instead  just forward the actual exception that occurred
                        if (cacheEntry.getStatusCode() >= 400) {
                            subscriber.onError(e);
                            return;
                        }

                        if (e instanceof JsonPipelineInputException
                                && ((JsonPipelineInputException) e).getStatusCode() == 404) {
                            log.warn(
                                    "CACHE FALLBACK - Using stale content from cache as a fallback after failing to fresh content for "
                                            + cacheKey + ",\n" + correlationId + "\n" + e.getMessage());
                        } else {
                            log.warn(
                                    "CACHE FALLBACK - Using stale content from cache as a fallback after failing to fresh content for "
                                            + cacheKey + ",\n" + correlationId,
                                    e);
                        }

                        JsonPipelineOutputImpl pipelineOutput = new JsonPipelineOutputImpl(
                                cacheEntry.getContentNode(), requests);

                        // when fallback content is served from cache, it should not be cached by the client at all
                        subscriber.onNext(pipelineOutput.withMaxAge(0));
                        subscriber.onCompleted();
                    }
                });
            }
        }

        private int getClientMaxAge() {
            int maxAgeFromClient = (int) TimeUnit.DAYS.toSeconds(365);
            for (String cacheControl : requests.get(0).getHeaders().get("Cache-Control")) {
                if (cacheControl.startsWith("max-age")) {
                    int maxAge = NumberUtils.toInt(StringUtils.substringAfter(cacheControl, "="), maxAgeFromClient);
                    if (maxAge > 0) {
                        maxAgeFromClient = maxAge;
                    }
                }
            }
            return maxAgeFromClient;
        }

        private void serveCachedContent(CacheEnvelope cacheEntry, int refreshInterval) {

            if (cacheEntry.getStatusCode() == HttpStatus.SC_NOT_FOUND) {
                // the cache entry is a 404 response that should be thrown as an exception to be handled by the subscriber
                String cachedInfoSuffix = " (Cached from " + cacheEntry.getSources() + " at "
                        + cacheEntry.getGeneratedDate() + ")";
                subscriber.onError(new JsonPipelineInputException(HttpStatus.SC_NOT_FOUND,
                        cacheEntry.getReasonString() + cachedInfoSuffix));
            } else {
                // make sure to set the max-age content-header just to the time the cached content will become stale
                int maxAge = refreshInterval - cacheEntry.getResponseAge();
                maxAge = Math.min(maxAge, cacheEntry.getExpirySeconds());

                subscriber.onNext(
                        new JsonPipelineOutputImpl(cacheEntry.getContentNode(), requests).withMaxAge(maxAge));
                subscriber.onCompleted();
            }
        }

        @Override
        public void onCompleted() {
            if (!cacheHit) {
                // there was no emission, so the response has to be fetched from the service
                log.debug("CACHE MISS for {} fetching response from {} through pipeline,\n{}", cacheKey,
                        getSourceServicePrefix(), correlationId);
                fetchAndStore(subscriber);
            }
        }

        @Override
        public void onError(Throwable e) {
            Exceptions.throwIfFatal(e);

            // also fall back to the actual service if the couchbase request failed
            log.warn("Failed to connect to couchbase server, falling back to direct connection to "
                    + getSourceServicePrefix() + ",\n" + correlationId, e);
            fetchAndStore(subscriber);
        }

        private void fetchAndStore(Subscriber<? super JsonPipelineOutput> backendResponseSubscriber) {

            // fetch the output with a new subscription, which will also store the response in the cache when it is retrieved
            originalSource.subscribe(new Observer<JsonPipelineOutput>() {

                @Override
                public void onNext(JsonPipelineOutput fetchedModel) {
                    CachePersistencyOptions options = strategy.getCachePersistencyOptions(requests);

                    int contentMaxAge = options.getRefreshInterval();
                    if (fetchedModel.getMaxAge() >= 0) {
                        contentMaxAge = Math.min(contentMaxAge, fetchedModel.getMaxAge());
                    }

                    log.debug(
                            "CACHE PUT - response for {} has been fetched and will be put in the cache, max-age={} sec,\n{}",
                            cacheKey, contentMaxAge, correlationId);

                    CacheEnvelope cacheEntry = CacheEnvelope.from200Response(fetchedModel.getPayload(),
                            contentMaxAge, requests, cacheKey, descriptor, context.getProperties());
                    context.getCacheAdapter().put(cacheKey, cacheEntry.getEnvelopeString(), options);

                    // everything else is just forwarding to the subscriber to the cachedSource
                    backendResponseSubscriber.onNext(fetchedModel.withMaxAge(contentMaxAge));
                }

                @Override
                public void onCompleted() {
                    backendResponseSubscriber.onCompleted();
                }

                @Override
                public void onError(Throwable e) {
                    Exceptions.throwIfFatal(e);

                    if (e instanceof JsonPipelineInputException) {
                        if (((JsonPipelineInputException) e).getStatusCode() == HttpStatus.SC_NOT_FOUND) {

                            int maxAgeFor404 = 60;
                            CachePersistencyOptions options = CachePersistencyOptions.createTransient(maxAgeFor404);
                            log.debug(
                                    "CACHE PUT - 404 response for {} will be stored in the cache, max-age={} sec,\n{}",
                                    descriptor, options.getRefreshInterval(), correlationId);

                            CacheEnvelope cacheEntry = CacheEnvelope.from404Response(e.getMessage(), maxAgeFor404,
                                    requests, cacheKey, descriptor, context.getProperties());
                            context.getCacheAdapter().put(cacheKey, cacheEntry.getEnvelopeString(), options);
                        }
                    }
                    backendResponseSubscriber.onError(e);
                }
            });
        }
    }

    /**
     * Implements generation and parsing of the cache "envelope" document, that wraps the JSON output of the pipeline to
     * be able to store additional metadata in the cache
     */
    public static final class CacheEnvelope {

        private static final String CACHE_METADATA_PROPERTY = "metadata";
        private static final String CACHE_CONTENT_PROPERTY = "content";

        private final ObjectNode envelopeNode;
        private final ObjectNode metadataNode;
        private final JsonNode contentNode;

        private CacheEnvelope(ObjectNode envelopeNode) {
            this.envelopeNode = envelopeNode;
            metadataNode = (ObjectNode) envelopeNode.get(CACHE_METADATA_PROPERTY);
            contentNode = envelopeNode.get(CACHE_CONTENT_PROPERTY);
        }

        /**
         * Parse a JSON string that was obtained from the couchbase cache
         * @param jsonString
         * @param cacheKey
         * @return the CacheEntry - or null if the json String was not in the expected format
         */
        public static CacheEnvelope fromEnvelopeString(String jsonString, String cacheKey) {
            try {
                ObjectNode envelopeFromCache = JacksonFunctions.stringToObjectNode(jsonString);
                if (!envelopeFromCache.has(CACHE_METADATA_PROPERTY)
                        || !envelopeFromCache.has(CACHE_CONTENT_PROPERTY)) {
                    log.warn(
                            "Ignoring cached document {}, because it doesn't have the expected metadata/content envelope.",
                            cacheKey);
                    return null;
                }

                return new CacheEnvelope(envelopeFromCache);
            } catch (JsonPipelineInputException e) {
                log.warn("Failed parse cached JSON document from " + cacheKey, e);
                return null;
            }
        }

        /**
         * Create a new CacheEnvelope to store in the couchbase cache
         * @param contentNode
         * @param requests
         * @param cacheKey
         * @param pipelineDescriptor
         * @param contextProperties
         * @return the new CacheEnvelope instance
         */
        public static CacheEnvelope from200Response(JsonNode contentNode, int maxAge,
                List<CaravanHttpRequest> requests, String cacheKey, String pipelineDescriptor,
                Map<String, String> contextProperties) {

            ObjectNode envelope = createEnvelopeNode(contentNode, HttpStatus.SC_OK, maxAge, requests, cacheKey,
                    pipelineDescriptor, null, contextProperties);
            return new CacheEnvelope(envelope);
        }

        /**
         * Create a new CacheEnvelope to store in the couchbase cache
         * @param reason
         * @param maxAge how many seconds 404 responses in cache should stay valid
         * @param cacheKey
         * @param pipelineDescriptor
         * @param contextProperties
         * @return the new CacheEnvelope instance
         */
        public static CacheEnvelope from404Response(String reason, int maxAge, List<CaravanHttpRequest> requests,
                String cacheKey, String pipelineDescriptor, Map<String, String> contextProperties) {

            JsonNode contentNode = JacksonFunctions.emptyObject();
            int statusCode = HttpStatus.SC_NOT_FOUND;

            ObjectNode envelope = createEnvelopeNode(contentNode, statusCode, maxAge, requests, cacheKey,
                    pipelineDescriptor, reason, contextProperties);
            return new CacheEnvelope(envelope);
        }

        static CacheEnvelope fromContentString(String contentJson, int age) {
            ObjectNode envelopeNode = createEnvelopeNode(JacksonFunctions.stringToObjectNode(contentJson), 200, 0,
                    ImmutableList.of(), "Cache-Key", "Descriptor", null, ImmutableMap.of());

            CacheEnvelope envelope = new CacheEnvelope(envelopeNode);

            envelope.getMetadataNode().put("generated", CacheDateUtils.formatRelativeTime(-age));

            return envelope;
        }

        private static ObjectNode createEnvelopeNode(JsonNode contentNode, int statusCode, int maxAge,
                List<CaravanHttpRequest> requests, String cacheKey, String pipelineDescriptor, String reason,
                Map<String, String> contextProperties) {

            ObjectNode envelope = JacksonFunctions.emptyObject();
            ObjectNode metadata = envelope.putObject(CACHE_METADATA_PROPERTY);

            metadata.put("cacheKey", cacheKey);
            metadata.set("sources", JacksonFunctions.pojoToNode(getSourceServiceIds(requests)));
            metadata.put("pipeline", pipelineDescriptor);
            metadata.put("generated", CacheDateUtils.formatCurrentTime());
            if (maxAge > 0) {
                metadata.put("expires", CacheDateUtils.formatRelativeTime(maxAge));
            }
            metadata.put("statusCode", statusCode);

            List<String> sourcePaths = new ArrayList<String>();
            for (CaravanHttpRequest req : requests) {
                sourcePaths.add(StringUtils.substringBefore(req.getUrl(), "?"));
            }
            metadata.set("sourcePaths", JacksonFunctions.pojoToNode(sourcePaths));

            if (StringUtils.isNotBlank(reason)) {
                metadata.put("reason", reason);
            }
            metadata.set("contextProperties", JacksonFunctions.pojoToNode(contextProperties));
            envelope.set(CACHE_CONTENT_PROPERTY, contentNode);

            return envelope;
        }

        /**
         * @return the full envelope (as JSON string) to be stored in the cache
         */
        public String getEnvelopeString() {
            return JacksonFunctions.nodeToString(envelopeNode);
        }

        JsonNode getContentNode() {
            return contentNode;
        }

        ObjectNode getMetadataNode() {
            return metadataNode;
        }

        int getStatusCode() {
            return metadataNode.at("/statusCode").asInt(HttpStatus.SC_OK);
        }

        String getReasonString() {
            return metadataNode.at("/reason").asText("Not Found");
        }

        String getSources() {
            return metadataNode.at("/sources").toString();
        }

        int getResponseAge() {
            return CacheDateUtils.getSecondsSince(getGeneratedDate());
        }

        String getGeneratedDate() {
            return metadataNode.at("/generated").asText();
        }

        int getExpirySeconds() {
            if (!metadataNode.has("expires")) {
                return (int) TimeUnit.DAYS.toSeconds(365);
            }
            String expiryDate = metadataNode.at("/expires").asText();
            return CacheDateUtils.getSecondsUntil(expiryDate);
        }

        /**
         * @param newDate
         */
        public void setGeneratedDate(String newDate) {
            metadataNode.put("generated", newDate);
        }

        /**
         * @param newDate
         */
        public void setExpiresDate(String newDate) {
            metadataNode.put("expires", newDate);
        }
    }
}