org.apache.giraph.ooc.OutOfCoreIOStatistics.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.giraph.ooc.OutOfCoreIOStatistics.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.giraph.ooc;

import com.google.common.collect.Maps;
import org.apache.giraph.conf.GiraphConstants;
import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration;
import org.apache.giraph.conf.IntConfOption;
import org.apache.giraph.ooc.command.IOCommand.IOCommandType;
import org.apache.log4j.Logger;

import java.util.Map;
import java.util.Queue;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.atomic.AtomicLong;

/**
 * Class to collect statistics regarding IO operations done in out-of-core
 * mechanism.
 */
public class OutOfCoreIOStatistics {
    /**
     * An estimate of disk bandwidth. This number is only used just at the start
     * of the computation, and will be updated/replaced later on once a few disk
     * operations happen.
     */
    public static final IntConfOption DISK_BANDWIDTH_ESTIMATE = new IntConfOption("giraph.diskBandwidthEstimate",
            125,
            "An estimate of disk bandwidth (MB/s). This number is used just at "
                    + "the beginning of the computation, and it will be "
                    + "updated/replaced once a few disk operations happen.");
    /**
     * How many recent IO operations should we keep track of? Any report given out
     * of this statistics collector is only based on most recent IO operations.
     */
    public static final IntConfOption IO_COMMAND_HISTORY_SIZE = new IntConfOption("giraph.ioCommandHistorySize", 50,
            "Number of most recent IO operations to consider for reporting the" + "statistics.");

    /** Class logger */
    private static final Logger LOG = Logger.getLogger(OutOfCoreIOStatistics.class);
    /** Estimate of disk bandwidth (bytes/second) */
    private final AtomicLong diskBandwidthEstimate;
    /** Cached value for IO_COMMAND_HISTORY_SIZE */
    private final int maxHistorySize;
    /**
     * Coefficient/Weight of the most recent IO operation toward the disk
     * bandwidth estimate. Basically if the disk bandwidth estimate if d, and the
     * latest IO command happened at the rate of r, the new estimate of disk
     * bandwidth is calculated as:
     * d_new = updateCoefficient * r + (1 - updateCoefficient) * d
     */
    private final double updateCoefficient;
    /** Queue of all recent commands */
    private final Queue<StatisticsEntry> commandHistory;
    /**
     * Command statistics for each type of IO command. This is the statistics of
     * the recent commands in the history we keep track of (with 'maxHistorySize'
     * command in the history).
     */
    private final Map<IOCommandType, StatisticsEntry> aggregateStats;
    /** How many IO command completed? */
    private int numUpdates = 0;

    /**
     * Constructor
     *
     * @param conf configuration
     * @param numIOThreads number of disks/IO threads
     */
    public OutOfCoreIOStatistics(ImmutableClassesGiraphConfiguration conf, int numIOThreads) {
        this.diskBandwidthEstimate = new AtomicLong(
                DISK_BANDWIDTH_ESTIMATE.get(conf) * (long) GiraphConstants.ONE_MB);
        this.maxHistorySize = IO_COMMAND_HISTORY_SIZE.get(conf);
        this.updateCoefficient = 1.0 / maxHistorySize;
        // Adding more entry to the capacity of the queue to have a wiggle room
        // if all IO threads are adding/removing entries from the queue
        this.commandHistory = new ArrayBlockingQueue<>(maxHistorySize + numIOThreads);
        this.aggregateStats = Maps.newConcurrentMap();
        for (IOCommandType type : IOCommandType.values()) {
            aggregateStats.put(type, new StatisticsEntry(type, 0, 0, 0));
        }
    }

    /**
     * Update statistics with the last IO command that is executed.
     *
     * @param type type of the IO command that is executed
     * @param bytesTransferred number of bytes transferred in the last IO command
     * @param duration duration it took for the last IO command to complete
     *                 (milliseconds)
     */
    public void update(IOCommandType type, long bytesTransferred, long duration) {
        StatisticsEntry entry = aggregateStats.get(type);
        synchronized (entry) {
            entry.setOccurrence(entry.getOccurrence() + 1);
            entry.setDuration(duration + entry.getDuration());
            entry.setBytesTransferred(bytesTransferred + entry.getBytesTransferred());
        }
        commandHistory.offer(new StatisticsEntry(type, bytesTransferred, duration, 0));
        if (type != IOCommandType.WAIT) {
            // If the current estimate is 'd', the new rate is 'r', and the size of
            // history is 'n', we can simply model all the past command's rate as:
            // d, d, d, ..., d, r
            // where 'd' happens for 'n-1' times. Hence the new estimate of the
            // bandwidth would be:
            // d_new = (d * (n-1) + r) / n = (1-1/n)*d + 1/n*r
            // where updateCoefficient = 1/n
            diskBandwidthEstimate.set((long) (updateCoefficient * (bytesTransferred / duration * 1000)
                    + (1 - updateCoefficient) * diskBandwidthEstimate.get()));
        }
        if (commandHistory.size() > maxHistorySize) {
            StatisticsEntry removedEntry = commandHistory.poll();
            entry = aggregateStats.get(removedEntry.getType());
            synchronized (entry) {
                entry.setOccurrence(entry.getOccurrence() - 1);
                entry.setDuration(entry.getDuration() - removedEntry.getDuration());
                entry.setBytesTransferred(entry.getBytesTransferred() - removedEntry.getBytesTransferred());
            }
        }
        numUpdates++;
        // Outputting log every so many commands
        if (numUpdates % 10 == 0) {
            if (LOG.isInfoEnabled()) {
                LOG.info(this);
            }
        }
    }

    @Override
    public String toString() {
        StringBuffer sb = new StringBuffer();
        long waitTime = 0;
        long loadTime = 0;
        long bytesRead = 0;
        long storeTime = 0;
        long bytesWritten = 0;
        for (Map.Entry<IOCommandType, StatisticsEntry> entry : aggregateStats.entrySet()) {
            synchronized (entry.getValue()) {
                sb.append(entry.getKey() + ": " + entry.getValue() + ", ");
                if (entry.getKey() == IOCommandType.WAIT) {
                    waitTime += entry.getValue().getDuration();
                } else if (entry.getKey() == IOCommandType.LOAD_PARTITION) {
                    loadTime += entry.getValue().getDuration();
                    bytesRead += entry.getValue().getBytesTransferred();
                } else {
                    storeTime += entry.getValue().getDuration();
                    bytesWritten += entry.getValue().getBytesTransferred();
                }
            }
        }
        sb.append(String.format("Average STORE: %.2f MB/s, ",
                (double) bytesWritten / storeTime * 1000 / 1024 / 1024));
        sb.append(String.format("DATA_INJECTION: %.2f MB/s, ",
                (double) (bytesRead - bytesWritten) / (waitTime + loadTime + storeTime) * 1000 / 1024 / 1024));
        sb.append(String.format("DISK_BANDWIDTH: %.2f MB/s", (double) diskBandwidthEstimate.get() / 1024 / 1024));

        return sb.toString();
    }

    /**
     * @return most recent estimate of the disk bandwidth
     */
    public long getDiskBandwidth() {
        return diskBandwidthEstimate.get();
    }

    /**
     * Get aggregate statistics of a given command type in the command history
     *
     * @param type type of the command to get the statistics for
     * @return aggregate statistics for the given command type
     */
    public BytesDuration getCommandTypeStats(IOCommandType type) {
        StatisticsEntry entry = aggregateStats.get(type);
        synchronized (entry) {
            return new BytesDuration(entry.getBytesTransferred(), entry.getDuration(), entry.getOccurrence());
        }
    }

    /**
     * Helper class to return results of statistics collector for a certain
     * command type
     */
    public static class BytesDuration {
        /** Number of bytes transferred in a few commands of the same type */
        private long bytes;
        /** Duration of it took to execute a few commands of the same type */
        private long duration;
        /** Number of commands executed of the same type */
        private int occurrence;

        /**
         * Constructor
         * @param bytes number of bytes transferred
         * @param duration duration it took to execute commands
         * @param occurrence number of commands
         */
        BytesDuration(long bytes, long duration, int occurrence) {
            this.bytes = bytes;
            this.duration = duration;
            this.occurrence = occurrence;
        }

        /**
         * @return number of bytes transferred for the same command type
         */
        public long getBytes() {
            return bytes;
        }

        /**
         * @return duration it took to execute a few commands of the same type
         */
        public long getDuration() {
            return duration;
        }

        /**
         * @return number of commands that are executed of the same type
         */
        public int getOccurrence() {
            return occurrence;
        }
    }

    /**
     * Helper class to keep statistics for a certain command type
     */
    private static class StatisticsEntry {
        /** Type of the command */
        private IOCommandType type;
        /**
         * Aggregate number of bytes transferred executing the particular command
         * type in the history we keep
         */
        private long bytesTransferred;
        /**
         * Aggregate duration it took executing the particular command type in the
         * history we keep
         */
        private long duration;
        /**
         * Number of occurrences of the particular command type in the history we
         * keep
         */
        private int occurrence;

        /**
         * Constructor
         *
         * @param type type of the command
         * @param bytesTransferred aggregate number of bytes transferred
         * @param duration aggregate execution time
         * @param occurrence number of occurrences of the particular command type
         */
        public StatisticsEntry(IOCommandType type, long bytesTransferred, long duration, int occurrence) {
            this.type = type;
            this.bytesTransferred = bytesTransferred;
            this.duration = duration;
            this.occurrence = occurrence;
        }

        /**
         * @return type of the command
         */
        public IOCommandType getType() {
            return type;
        }

        /**
         * @return aggregate number of bytes transferred in the particular command
         *         type
         */
        public long getBytesTransferred() {
            return bytesTransferred;
        }

        /**
         * Update the aggregate number of bytes transferred
         *
         * @param bytesTransferred aggregate number of bytes
         */
        public void setBytesTransferred(long bytesTransferred) {
            this.bytesTransferred = bytesTransferred;
        }

        /**
         * @return aggregate duration it took to execute the particular command type
         */
        public long getDuration() {
            return duration;
        }

        /**
         * Update the aggregate duration
         *
         * @param duration aggregate duration
         */
        public void setDuration(long duration) {
            this.duration = duration;
        }

        /**
         * @return number of occurrences of the particular command type
         */
        public int getOccurrence() {
            return occurrence;
        }

        /**
         * Update the number of occurrences of the particular command type
         *
         * @param occurrence number of occurrences
         */
        public void setOccurrence(int occurrence) {
            this.occurrence = occurrence;
        }

        @Override
        public String toString() {
            if (type == IOCommandType.WAIT) {
                return String.format("%.2f sec", duration / 1000.0);
            } else {
                return String.format("%.2f MB/s", (double) bytesTransferred / duration * 1000 / 1024 / 2014);
            }
        }
    }
}