cascading.stats.hadoop.CounterCache.java Source code

Java tutorial

Introduction

Here is the source code for cascading.stats.hadoop.CounterCache.java

Source

/*
 * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
 *
 * Project and contact information: http://www.cascading.org/
 *
 * This file is part of the Cascading project.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cascading.stats.hadoop;

import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

import cascading.flow.FlowException;
import cascading.stats.CascadingStats;
import org.apache.hadoop.conf.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static cascading.util.Util.formatDurationFromMillis;
import static java.lang.System.currentTimeMillis;

/**
 *
 */
public abstract class CounterCache<JobStatus, Counters> {
    public static final String COUNTER_TIMEOUT_PROPERTY = "cascading.counter.timeout.seconds";
    public static final String COUNTER_FETCH_RETRIES_PROPERTY = "cascading.counter.fetch.retries";
    public static final String COUNTER_MAX_AGE_PROPERTY = "cascading.counter.age.max.seconds";
    public static final int DEFAULT_TIMEOUT_TIMEOUT_SEC = 0; // zero means making the call synchronously
    public static final int DEFAULT_FETCH_RETRIES = 3;
    public static final int DEFAULT_CACHED_AGE_MAX = 0; // rely on client interface caching

    private static final Logger LOG = LoggerFactory.getLogger(CounterCache.class);

    // hardcoded at one thread to force serialization across all requesters in the jvm
    // this likely prevents the deadlocks the futures are safeguards against
    private static ExecutorService futuresPool = Executors.newSingleThreadExecutor(new ThreadFactory() {
        @Override
        public Thread newThread(Runnable runnable) {
            Thread thread = new Thread(runnable, "stats-counter-future");

            thread.setDaemon(true);

            return thread;
        }
    });

    private CascadingStats stats;
    private boolean hasCapturedFinalCounters;
    private boolean hasAvailableCounters = true;
    private Counters cachedCounters = null;
    private long lastFetch = 0;
    private boolean warnedStale = false;

    protected int maxFetchAttempts;
    protected int fetchAttempts;
    protected int timeout;
    protected int maxAge;

    protected CounterCache(CascadingStats stats, Configuration configuration) {
        this.stats = stats;
        this.timeout = configuration.getInt(COUNTER_TIMEOUT_PROPERTY, DEFAULT_TIMEOUT_TIMEOUT_SEC);
        this.maxFetchAttempts = configuration.getInt(COUNTER_FETCH_RETRIES_PROPERTY, DEFAULT_FETCH_RETRIES);
        this.maxAge = configuration.getInt(COUNTER_MAX_AGE_PROPERTY, DEFAULT_CACHED_AGE_MAX);
    }

    protected abstract JobStatus getJobStatusClient();

    protected abstract boolean areCountersAvailable(JobStatus runningJob);

    protected abstract Counters getCounters(JobStatus runningJob) throws IOException;

    protected abstract Collection<String> getGroupNames(Counters counters);

    protected abstract Set<String> getCountersFor(Counters counters, String group);

    protected abstract long getCounterValue(Counters counters, Enum counter);

    protected abstract long getCounterValue(Counters counters, String group, String counter);

    public Collection<String> getCounterGroups() {
        Counters counters = cachedCounters();

        if (counters == null)
            return Collections.emptySet();

        return Collections.unmodifiableCollection(getGroupNames(counters));
    }

    public Collection<String> getCounterGroupsMatching(String regex) {
        Counters counters = cachedCounters();

        if (counters == null)
            return Collections.emptySet();

        Set<String> results = new HashSet<String>();

        for (String counter : getGroupNames(counters)) {
            if (counter.matches(regex))
                results.add(counter);
        }

        return Collections.unmodifiableCollection(results);
    }

    public Collection<String> getCountersFor(String group) {
        Counters counters = cachedCounters();

        if (counters == null)
            return Collections.emptySet();

        Set<String> results = getCountersFor(counters, group);

        return Collections.unmodifiableCollection(results);
    }

    public long getCounterValue(Enum counter) {
        Counters counters = cachedCounters();

        if (counters == null)
            return 0;

        return getCounterValue(counters, counter);
    }

    public long getCounterValue(String group, String counter) {
        Counters counters = cachedCounters();

        if (counters == null)
            return 0;

        return getCounterValue(counters, group, counter);
    }

    protected Counters cachedCounters() {
        return cachedCounters(false);
    }

    protected synchronized Counters cachedCounters(boolean force) {
        if (!hasAvailableCounters)
            return cachedCounters;

        // no point in trying again
        if (fetchAttempts >= maxFetchAttempts) {
            if (!hasCapturedFinalCounters && !warnedStale) {
                if (cachedCounters == null)
                    LOG.warn("no counters fetched, max num consecutive retries reached: {}, type: {}, status: {}",
                            maxFetchAttempts, stats.getType(), stats.getStatus());
                else
                    LOG.warn(
                            "stale counters being returned, max num consecutive retries reached, age: {}, type: {}, status: {}",
                            formatDurationFromMillis(currentTimeMillis() - lastFetch), stats.getType(),
                            stats.getStatus());

                warnedStale = true;
            }

            return cachedCounters;
        }

        boolean isProcessFinished = stats.isFinished();

        // ignore force, no reason to refresh completed stats
        if (isProcessFinished && hasCapturedFinalCounters)
            return cachedCounters;

        // have not capturedFinalCounters - force it
        if (!force && isProcessFinished)
            force = true;

        int currentAge = (int) ((lastFetch - currentTimeMillis()) / 1000);

        boolean isStale = currentAge >= maxAge;

        // if we have counters, aren't being forced to update, and values aren't considered stale, return them
        if (cachedCounters != null && !force && !isStale)
            return cachedCounters;

        JobStatus runningJob = getJobStatusClient();

        if (runningJob == null)
            return cachedCounters;

        if (!areCountersAvailable(runningJob)) {
            hasAvailableCounters = false;
            return cachedCounters;
        }

        boolean success = false;

        try {
            Counters fetched = fetchCounters(runningJob);

            success = fetched != null;

            if (success) {
                cachedCounters = fetched;
                lastFetch = currentTimeMillis();
                fetchAttempts = 0; // reset attempt counter, mitigates for transient non-consecutive failures
            }
        } catch (InterruptedException exception) {
            LOG.warn("fetching counters was interrupted");
        } catch (ExecutionException exception) {
            fetchAttempts++;

            if (fetchAttempts >= maxFetchAttempts)
                LOG.error("fetching counters failed, was final consecutive attempt: {}, type: {}, status: {}",
                        fetchAttempts, stats.getType(), stats.getStatus(), exception.getCause());
            else
                LOG.warn("fetching counters failed, consecutive attempts: {}, type: {}, status: {}, message: {}",
                        fetchAttempts, stats.getType(), stats.getStatus(), exception.getCause().getMessage());

            if (cachedCounters != null) {
                LOG.error("returning cached values");

                return cachedCounters;
            }

            LOG.error("unable to get remote counters, no cached values, rethrowing exception",
                    exception.getCause());

            if (exception.getCause() instanceof FlowException)
                throw (FlowException) exception.getCause();

            throw new FlowException(exception.getCause());
        } catch (TimeoutException exception) {
            fetchAttempts++;

            if (fetchAttempts >= maxFetchAttempts)
                LOG.warn(
                        "fetching counters timed out after: {} seconds, was final consecutive attempt: {}, type: {}, status: {}",
                        timeout, fetchAttempts, stats.getType(), stats.getStatus());
            else
                LOG.warn(
                        "fetching counters timed out after: {} seconds, consecutive attempts: {}, type: {}, status: {}",
                        timeout, fetchAttempts, stats.getType(), stats.getStatus());
        }

        hasCapturedFinalCounters = isProcessFinished && success;

        return cachedCounters;
    }

    private Counters fetchCounters(JobStatus runningJob)
            throws InterruptedException, ExecutionException, TimeoutException {
        // if timeout greater than zero, perform async call
        if (timeout > 0)
            return runFuture(runningJob).get(timeout, TimeUnit.SECONDS);

        try {
            return getCounters(runningJob);
        } catch (IOException exception) {
            throw new FlowException("unable to get remote counter values", exception);
        }
    }

    private Future<Counters> runFuture(final JobStatus jobStatus) {
        Callable<Counters> task = new Callable<Counters>() {
            @Override
            public Counters call() throws Exception {
                try {
                    return getCounters(jobStatus);
                } catch (IOException exception) {
                    throw new FlowException("unable to get remote counter values", exception);
                }
            }
        };

        return futuresPool.submit(task);
    }
}