de.zib.sfs.WrappedFSDataInputStream.java Source code

Java tutorial

Introduction

Here is the source code for de.zib.sfs.WrappedFSDataInputStream.java

Source

/*
 * Copyright (c) 2016 by Robert Schmidtke,
 *               Zuse Institute Berlin
 *
 * Licensed under the BSD License, see LICENSE file for details.
 *
 */
package de.zib.sfs;

import java.io.IOException;
import java.io.InputStream;
import java.lang.invoke.LambdaMetafactory;
import java.lang.invoke.MethodHandle;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.lang.reflect.Method;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.HashMap;
import java.util.Map;
import java.util.function.Supplier;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PositionedReadable;
import org.apache.hadoop.fs.Seekable;
import org.apache.hadoop.hdfs.client.HdfsDataInputStream;

import de.zib.sfs.instrument.statistics.LiveOperationStatisticsAggregator;
import de.zib.sfs.instrument.statistics.OperationCategory;
import de.zib.sfs.instrument.statistics.OperationSource;

public class WrappedFSDataInputStream extends InputStream implements PositionedReadable, Seekable {

    private final FSDataInputStream in;
    private final int fd;

    private final String hostname;
    private Supplier<String> datanodeHostnameSupplier;

    private final boolean skipOther;

    private final LiveOperationStatisticsAggregator aggregator;

    // Shadow super class' LOG
    public static final Log LOG = LogFactory.getLog(WrappedFSDataInputStream.class);

    private static Map<String, String> HOSTNAME_CACHE = new HashMap<>();

    public WrappedFSDataInputStream(FSDataInputStream in, Path f, LiveOperationStatisticsAggregator aggregator,
            boolean skipOther) {
        this.in = in;
        this.aggregator = aggregator;
        this.fd = this.aggregator.registerFileDescriptor(f.toString());
        this.hostname = System.getProperty("de.zib.sfs.hostname");
        this.skipOther = skipOther;
    }

    @Override
    public int read() throws IOException {
        long startTime = System.nanoTime();
        int result = this.in.read();
        String datanodeHostname = getDatanodeHostNameString();
        this.aggregator.aggregateReadDataOperationStatistics(OperationSource.SFS, OperationCategory.READ, startTime,
                System.nanoTime(), this.fd, result == -1 ? 0 : 1,
                this.hostname.equals(datanodeHostname) || "localhost".equals(datanodeHostname));
        return result;
    }

    @Override
    public int read(byte[] b, int off, int len) throws IOException {
        long startTime = System.nanoTime();
        int result = this.in.read(b, off, len);
        String datanodeHostname = getDatanodeHostNameString();
        this.aggregator.aggregateReadDataOperationStatistics(OperationSource.SFS, OperationCategory.READ, startTime,
                System.nanoTime(), this.fd, result == -1 ? 0 : result,
                this.hostname.equals(datanodeHostname) || "localhost".equals(datanodeHostname));
        return result;
    }

    @Override
    public int read(byte[] b) throws IOException {
        long startTime = System.nanoTime();
        int result = this.in.read(b);
        String datanodeHostname = getDatanodeHostNameString();
        this.aggregator.aggregateReadDataOperationStatistics(OperationSource.SFS, OperationCategory.READ, startTime,
                System.nanoTime(), this.fd, result == -1 ? 0 : result,
                this.hostname.equals(datanodeHostname) || "localhost".equals(datanodeHostname));
        return result;
    }

    @Override
    public long getPos() throws IOException {
        return this.in.getPos();
    }

    @Override
    public void seek(long desired) throws IOException {
        long startTime = System.nanoTime();
        this.in.seek(desired);
        if (!this.skipOther) {
            this.aggregator.aggregateOperationStatistics(OperationSource.SFS, OperationCategory.OTHER, startTime,
                    System.nanoTime(), this.fd);
        }
    }

    @Override
    public boolean seekToNewSource(long targetPos) throws IOException {
        long startTime = System.nanoTime();
        boolean result = this.in.seekToNewSource(targetPos);
        if (!this.skipOther) {
            this.aggregator.aggregateOperationStatistics(OperationSource.SFS, OperationCategory.OTHER, startTime,
                    System.nanoTime(), this.fd);
        }
        return result;
    }

    @Override
    public int read(long position, byte[] buffer, int offset, int length) throws IOException {
        long startTime = System.nanoTime();
        int result = this.in.read(position, buffer, offset, length);
        String datanodeHostname = getDatanodeHostNameString();
        this.aggregator.aggregateReadDataOperationStatistics(OperationSource.SFS, OperationCategory.READ, startTime,
                System.nanoTime(), this.fd, result == -1 ? 0 : result,
                this.hostname.equals(datanodeHostname) || "localhost".equals(datanodeHostname));
        return result;
    }

    @Override
    public void readFully(long position, byte[] buffer) throws IOException {
        long startTime = System.nanoTime();
        this.in.readFully(position, buffer);
        String datanodeHostname = getDatanodeHostNameString();
        this.aggregator.aggregateReadDataOperationStatistics(OperationSource.SFS, OperationCategory.READ, startTime,
                System.nanoTime(), this.fd, buffer.length,
                this.hostname.equals(datanodeHostname) || "localhost".equals(datanodeHostname));
    }

    @Override
    public void readFully(long position, byte[] buffer, int offset, int length) throws IOException {
        long startTime = System.nanoTime();
        this.in.readFully(position, buffer, offset, length);
        String datanodeHostname = getDatanodeHostNameString();
        this.aggregator.aggregateReadDataOperationStatistics(OperationSource.SFS, OperationCategory.READ, startTime,
                System.nanoTime(), this.fd, length,
                this.hostname.equals(datanodeHostname) || "localhost".equals(datanodeHostname));
    }

    // Helper methods

    /**
     * Gets the datanode that was last read from as a string. Should be called
     * after the first read operation has been performed.
     * 
     * @return "->" + hostname of the datanode, or empty string if the
     *         information is not available
     */
    private String getDatanodeHostNameString() {
        if (this.datanodeHostnameSupplier == null) {
            if (this.in instanceof HdfsDataInputStream) {
                // call Hadoop's method directly
                final HdfsDataInputStream hdfsIn = (HdfsDataInputStream) this.in;
                if (hdfsIn.getCurrentDatanode() != null) {
                    this.datanodeHostnameSupplier = () -> hdfsIn.getCurrentDatanode().getHostName();
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Using datanodeHostNameSupplier from Hadoop.");
                    }
                } else {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("datanodeHostNameSupplier from Hadoop has no DataNode information.");
                    }
                    this.datanodeHostnameSupplier = () -> "";
                }
            } else {
                try {
                    // Check if there's an appropriately named method available
                    // that returns the hostname of the current node that is
                    // being read from. Using the lambda factory provides almost
                    // direct invocation performance.
                    MethodHandles.Lookup methodHandlesLookup = MethodHandles.lookup();

                    // try this stream or the one it wraps
                    Method getCurrentDatanodeHostNameMethod = null;
                    InputStream bindToStream = null;
                    try {
                        getCurrentDatanodeHostNameMethod = this.in.getClass()
                                .getDeclaredMethod("getCurrentDatanodeHostName");
                        bindToStream = this.in;
                    } catch (NoSuchMethodException e) {
                        getCurrentDatanodeHostNameMethod = this.in.getWrappedStream().getClass()
                                .getDeclaredMethod("getCurrentDatanodeHostName");
                        bindToStream = this.in.getWrappedStream();
                    }

                    MethodHandle datanodeHostNameSupplierTarget = LambdaMetafactory.metafactory(methodHandlesLookup,
                            "get", MethodType.methodType(Supplier.class, bindToStream.getClass()),
                            MethodType.methodType(Object.class),
                            methodHandlesLookup.unreflect(getCurrentDatanodeHostNameMethod),
                            MethodType.methodType(Object.class)).getTarget();
                    this.datanodeHostnameSupplier = (Supplier<String>) datanodeHostNameSupplierTarget
                            .bindTo(bindToStream).invoke();

                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Using 'getCurrentDatanodeHostName' as datanodeHostNameSupplier.");
                    }
                } catch (Throwable t) {
                    this.datanodeHostnameSupplier = () -> "";
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("No datanodeHostNameSupplier available.", t);
                    }
                }
            }
        }

        // handle cases where we have to perform a reverse lookup if
        // hostname is an IP
        String dnHostname = this.datanodeHostnameSupplier.get();
        String cachedHostname = HOSTNAME_CACHE.get(dnHostname);
        if (cachedHostname == null) {
            try {
                // strip port if necessary
                int portIndex = dnHostname.indexOf(":");
                cachedHostname = InetAddress
                        .getByName(portIndex == -1 ? dnHostname : dnHostname.substring(0, portIndex)).getHostName();
            } catch (UnknownHostException e) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Could not determine hostname for " + dnHostname, e);
                }
                cachedHostname = "";
            }
            HOSTNAME_CACHE.put(dnHostname, cachedHostname);
        }
        return cachedHostname;
    }

}