com.simiacryptus.mindseye.layers.cudnn.ImgTileAssemblyLayer.java Source code

Introduction

Here is the source code for com.simiacryptus.mindseye.layers.cudnn.ImgTileAssemblyLayer.java
Source

/*
 * Copyright (c) 2018 by Andrew Charneski.
 *
 * The author licenses this file to you under the
 * Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance
 * with the License.  You may obtain a copy
 * of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.simiacryptus.mindseye.layers.cudnn;

import com.google.gson.JsonObject;
import com.simiacryptus.mindseye.lang.*;
import com.simiacryptus.mindseye.lang.cudnn.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.util.*;
import java.util.stream.Stream;

/**
 * Reduces the resolution of the input by selecting a centered window. The output png will have the same number of
 * color bands.
 */
@SuppressWarnings("serial")
public class ImgTileAssemblyLayer extends LayerBase implements MultiPrecision<ImgTileAssemblyLayer> {
    private static final Logger log = LoggerFactory.getLogger(ImgTileAssemblyLayer.class);

    private int columns;
    private int rows;
    private Precision precision = Precision.Double;
    private boolean parallel;

    /**
     * Instantiates a new Img eval key.
     */
    private ImgTileAssemblyLayer() {
    }

    /**
     * Instantiates a new Img crop key.
     *
     * @param columns the size x
     * @param rows    the size y
     */
    public ImgTileAssemblyLayer(int columns, int rows) {
        this.columns = columns;
        this.rows = rows;
    }

    /**
     * Instantiates a new Img eval key.
     *
     * @param json the json
     * @param rs   the rs
     */
    protected ImgTileAssemblyLayer(@Nonnull final JsonObject json, Map<CharSequence, byte[]> rs) {
        super(json);
        columns = json.get("columns").getAsInt();
        rows = json.get("rows").getAsInt();
        this.parallel = json.get("parallel").getAsBoolean();
        this.precision = Precision.valueOf(json.getAsJsonPrimitive("precision").getAsString());
    }

    /**
     * From json img eval key.
     *
     * @param json the json
     * @param rs   the rs
     * @return the img eval key
     */
    public static ImgTileAssemblyLayer fromJson(@Nonnull final JsonObject json, Map<CharSequence, byte[]> rs) {
        return new ImgTileAssemblyLayer(json, rs);
    }

    /**
     * Gets compatibility key.
     *
     * @return the compatibility key
     */
    @Nonnull
    public Layer getCompatibilityLayer() {
        return this.as(com.simiacryptus.mindseye.layers.java.ImgTileAssemblyLayer.class);
    }

    @Nullable
    @Override
    public Result evalAndFree(@Nonnull final Result... inObj) {
        if (!CudaSystem.isEnabled())
            return getCompatibilityLayer().evalAndFree(inObj);
        if (1 == inObj.length) {
            return inObj[0];
        }
        int[] inputDimensions = inObj[0].getData().getDimensions();
        assert 3 == inputDimensions.length;
        final int length = inObj[0].getData().length();
        int[] outputDims = getOutputDims(inObj);
        final TensorList outputData = CudaSystem.run(gpu -> {
            assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
            assert outputDims[0] > 0;
            assert outputDims[1] > 0;
            assert outputDims[2] > 0;
            @Nonnull
            final CudaMemory outputBuffer = gpu.allocate(
                    (long) length * outputDims[2] * outputDims[1] * outputDims[0] * precision.size,
                    MemoryType.Managed.normalize(), false);
            int totalWidth = 0;
            int totalHeight = 0;
            int inputIndex = 0;
            List<CopyParams> copies = new ArrayList<>();
            for (int row = 0; row < rows; row++) {
                int positionX = 0;
                int rowHeight = 0;
                for (int col = 0; col < columns; col++) {
                    int[] tileDimensions = inObj[inputIndex].getData().getDimensions();
                    rowHeight = Math.max(rowHeight, tileDimensions[1]);
                    copies.add(new CopyParams(gpu, inObj, outputBuffer, length, outputDims, tileDimensions,
                            inputIndex, positionX, totalHeight));
                    positionX += tileDimensions[0];
                    inputIndex += 1;
                    assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
                }
                totalHeight += rowHeight;
                totalWidth = Math.max(totalWidth, positionX);
            }
            assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
            Stream<CopyParams> stream = copies.stream();
            if (!CoreSettings.INSTANCE().isSingleThreaded() && parallel)
                stream = stream.parallel();
            stream.forEach(this::copy);
            Arrays.stream(inObj).forEach(r -> r.getData().freeRef());
            CudaDevice.CudaTensorDescriptor descriptor = gpu.newTensorDescriptor(precision, length, outputDims[2],
                    outputDims[1], outputDims[0]);
            CudaTensor ptr = CudaTensor.wrap(outputBuffer, descriptor, precision);
            return CudaTensorList.wrap(ptr, length, outputDims, precision);
        }, Arrays.stream(inObj).map(Result::getData).toArray());

        return new Result(outputData, (@Nonnull final DeltaSet<UUID> buffer, @Nonnull final TensorList error) -> {
            if (!Arrays.equals(error.getDimensions(), outputData.getDimensions())) {
                throw new AssertionError(Arrays.toString(error.getDimensions()) + " != "
                        + Arrays.toString(outputData.getDimensions()));
            }
            if (error.length() != outputData.length()) {
                throw new AssertionError(error.length() + " != " + outputData.length());
            }
            assert error.length() == length;

            int totalHeight = 0;
            int inputIndex = 0;
            List<BackpropParams> tasks = new ArrayList<>();
            for (int row = 0; row < rows; row++) {
                int positionX = 0;
                int rowHeight = 0;
                for (int col = 0; col < columns; col++) {
                    Result in = inObj[inputIndex];
                    int[] tileDimensions = in.getData().getDimensions();
                    rowHeight = Math.max(rowHeight, tileDimensions[1]);
                    if (inObj[inputIndex].isAlive()) {
                        tasks.add(new BackpropParams(inObj, buffer, error, outputDims, tileDimensions, length,
                                positionX, totalHeight, inputIndex));
                    }
                    positionX += tileDimensions[0];
                    inputIndex += 1;
                }
                totalHeight += rowHeight;
            }
            Stream<BackpropParams> stream = tasks.stream();
            if (!CoreSettings.INSTANCE().isSingleThreaded() && parallel)
                stream = stream.parallel();
            stream.forEach(this::backprop);
        }) {

            @Override
            protected void _free() {
                Arrays.stream(inObj).forEach(nnResult -> nnResult.freeRef());
            }

            @Override
            public boolean isAlive() {
                return Arrays.stream(inObj).anyMatch(x -> x.isAlive());
            }
        };
    }

    /**
     * Backprop.
     *
     * @param backpropParams the backprop params
     */
    public void backprop(final BackpropParams backpropParams) {
        final TensorList passbackTensorList = CudaSystem.run(gpu -> {
            CudaTensor ptr = copy(gpu, backpropParams.error, backpropParams.tileDimensions,
                    backpropParams.outputDims, backpropParams.length, -backpropParams.positionX,
                    -backpropParams.totalHeight);
            return CudaTensorList.wrap(ptr, backpropParams.length, backpropParams.tileDimensions, precision);
        }, backpropParams.error);
        backpropParams.inObj[backpropParams.inputIndex].accumulate(backpropParams.buffer, passbackTensorList);
    }

    /**
     * Copy cuda tensor.
     *
     * @param gpu            the gpu
     * @param error          the error
     * @param tileDimensions the tile dimensions
     * @param outputDims     the output dims
     * @param length         the length
     * @param positionX      the position x
     * @param positionY      the position y
     * @return the cuda tensor
     */
    public CudaTensor copy(final CudnnHandle gpu, final TensorList error, final int[] tileDimensions,
            final int[] outputDims, final int length, final int positionX, final int positionY) {
        @Nullable
        final CudaTensor errorPtr = gpu.getTensor(error, precision, MemoryType.Device, false);
        @Nonnull
        final CudaMemory passbackBuffer = gpu.allocate(
                (long) length * tileDimensions[2] * tileDimensions[1] * tileDimensions[0] * precision.size,
                MemoryType.Managed.normalize(), false);
        copy(gpu, length, outputDims, errorPtr, tileDimensions, passbackBuffer, positionX, positionY);
        errorPtr.freeRef();
        CudaDevice.CudaTensorDescriptor descriptor = gpu.newTensorDescriptor(precision, length, tileDimensions[2],
                tileDimensions[1], tileDimensions[0]);
        return CudaTensor.wrap(passbackBuffer, descriptor, precision);
    }

    /**
     * Copy.
     *
     * @param copyParams the copy params
     */
    public void copy(final CopyParams copyParams) {
        CudnnHandle gpu = copyParams.gpu;
        gpu.initThread();
        assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
        @Nullable
        final CudaTensor inputBuffer = gpu.getTensor(copyParams.inObj[copyParams.inputIndex].getData(), precision,
                MemoryType.Device, false);
        copy(gpu, copyParams.length, copyParams.tileDimensions, inputBuffer, copyParams.outputDims,
                copyParams.outputBuffer, copyParams.positionX, copyParams.totalHeight);
        inputBuffer.freeRef();
    }

    private int[] getOutputDims(final Result[] inObj) {
        int bands = inObj[0].getData().getDimensions()[2];
        int totalWidth = 0;
        int totalHeight = 0;
        int inputIndex = 0;
        for (int row = 0; row < rows; row++) {
            int positionX = 0;
            int rowHeight = 0;
            for (int col = 0; col < columns; col++) {
                int[] dimensions = inObj[inputIndex].getData().getDimensions();
                rowHeight = Math.max(rowHeight, dimensions[1]);
                positionX += dimensions[0];
                inputIndex += 1;
            }
            totalHeight += rowHeight;
            totalWidth = Math.max(totalWidth, positionX);
        }
        return new int[] { totalWidth, totalHeight, bands };
    }

    /**
     * Copy.
     *
     * @param gpu                   the gpu
     * @param length                the length
     * @param sourceDimensions      the length in
     * @param source                the input buffer
     * @param destinationDimensions the length out
     * @param destination           the output buffer
     * @param positionX             the position x
     * @param positionY             the position y
     * @return the int [ ]
     */
    public int[] copy(@Nonnull CudnnHandle gpu, int length, @Nonnull int[] sourceDimensions,
            @Nonnull CudaTensor source, @Nonnull int[] destinationDimensions, @Nonnull CudaMemory destination,
            int positionX, int positionY) {
        if (3 != sourceDimensions.length)
            throw new IllegalArgumentException("inputDimensions.length");
        if (3 != destinationDimensions.length)
            throw new IllegalArgumentException("dimOut.length");
        int bands = sourceDimensions[2];
        if (bands != destinationDimensions[2])
            throw new IllegalArgumentException(String.format("%d != %d", bands, destinationDimensions[2]));
        //log.info(String.format("offset=%d,%d", offsetX, offsetY));
        @Nonnull
        final int[] viewDim = getViewDimensions(sourceDimensions, destinationDimensions,
                new int[] { positionX, positionY, 0 });
        @Nonnull
        final CudaDevice.CudaTensorDescriptor sourceViewDescriptor = gpu.newTensorDescriptor(precision, //
                length, //
                viewDim[2], //
                viewDim[1], //
                viewDim[0], //
                source.descriptor.nStride, //
                source.descriptor.cStride, //
                source.descriptor.hStride, //
                source.descriptor.wStride);
        @Nonnull
        final CudaDevice.CudaTensorDescriptor destinationViewDescriptor = gpu.newTensorDescriptor(precision, //
                length, //
                viewDim[2], //
                viewDim[1], //
                viewDim[0], //
                destinationDimensions[2] * destinationDimensions[1] * destinationDimensions[0], //
                destinationDimensions[1] * destinationDimensions[0], //
                destinationDimensions[0], //
                1);
        int sourceOffset = 0;
        int destinationOffset = 0;

        if (positionX > 0) {
            destinationOffset += Math.abs(positionX);
        } else {
            sourceOffset += source.descriptor.wStride * Math.abs(positionX);
        }
        if (positionY > 0) {
            destinationOffset += destinationDimensions[0] * Math.abs((positionY));
        } else {
            sourceOffset += source.descriptor.hStride * (Math.abs(positionY));
        }
        assert sourceOffset >= 0;
        assert destinationOffset >= 0;
        assert sourceOffset + Tensor.length(viewDim) <= (source.descriptor.nStride * length);
        assert destinationOffset + Tensor.length(viewDim) <= Tensor.length(destinationDimensions);

        CudaMemory sourceMemory = source.getMemory(gpu);
        CudaSystem.handle(gpu.cudnnTransformTensor(precision.getPointer(1.0), sourceViewDescriptor.getPtr(),
                sourceMemory.getPtr().withByteOffset(sourceOffset * precision.size), precision.getPointer(1.0),
                destinationViewDescriptor.getPtr(),
                destination.getPtr().withByteOffset(destinationOffset * precision.size)));
        assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
        sourceMemory.dirty();
        destination.dirty();
        sourceMemory.freeRef();
        Arrays.stream(new ReferenceCounting[] { sourceViewDescriptor, destinationViewDescriptor })
                .forEach(ReferenceCounting::freeRef);
        return viewDim;

    }

    /**
     * Get view dimensions int [ ].
     *
     * @param sourceDimensions      the source dimensions
     * @param destinationDimensions the destination dimensions
     * @param offset                the offset
     * @return the int [ ]
     */
    @Nonnull
    public int[] getViewDimensions(int[] sourceDimensions, int[] destinationDimensions, int[] offset) {
        @Nonnull
        final int[] viewDim = new int[3];
        Arrays.parallelSetAll(viewDim,
                i -> Math.min(sourceDimensions[i] + offset[i], destinationDimensions[i]) - Math.max(offset[i], 0));
        return viewDim;
    }

    @Nonnull
    @Override
    public JsonObject getJson(Map<CharSequence, byte[]> resources, DataSerializer dataSerializer) {
        @Nonnull
        final JsonObject json = super.getJsonStub();
        json.addProperty("rows", rows);
        json.addProperty("columns", columns);
        json.addProperty("precision", precision.name());
        json.addProperty("parallel", isParallel());
        return json;
    }

    @Nonnull
    @Override
    public List<double[]> state() {
        return Arrays.asList();
    }

    @Override
    public Precision getPrecision() {
        return precision;
    }

    @Nonnull
    @Override
    public ImgTileAssemblyLayer setPrecision(final Precision precision) {
        this.precision = precision;
        return this;
    }

    /**
     * Is parallel boolean.
     *
     * @return the boolean
     */
    public boolean isParallel() {
        return parallel;
    }

    /**
     * Sets parallel.
     *
     * @param parallel the parallel
     * @return the parallel
     */
    public ImgTileAssemblyLayer setParallel(final boolean parallel) {
        this.parallel = parallel;
        return this;
    }

    private static class CopyParams {
        /**
         * The Length.
         */
        public final int length;
        /**
         * The Output dims.
         */
        public final int[] outputDims;
        /**
         * The Gpu.
         */
        public final CudnnHandle gpu;
        /**
         * The Output buffer.
         */
        public final CudaMemory outputBuffer;
        /**
         * The Total height.
         */
        public final int totalHeight;
        /**
         * The Input index.
         */
        public final int inputIndex;
        /**
         * The Position x.
         */
        public final int positionX;
        /**
         * The Tile dimensions.
         */
        public final int[] tileDimensions;
        /**
         * The In obj.
         */
        @Nonnull
        public final Result[] inObj;

        private CopyParams(final CudnnHandle gpu, @Nonnull final Result[] inObj, final CudaMemory outputBuffer,
                final int length, final int[] outputDims, final int[] tileDimensions, final int inputIndex,
                final int positionX, final int totalHeight) {
            this.length = length;
            this.outputDims = outputDims;
            this.gpu = gpu;
            this.outputBuffer = outputBuffer;
            this.totalHeight = totalHeight;
            this.inputIndex = inputIndex;
            this.positionX = positionX;
            this.tileDimensions = tileDimensions;
            this.inObj = inObj;
        }

    }

    private static class BackpropParams {
        /**
         * The In obj.
         */
        @Nonnull
        public final Result[] inObj;
        /**
         * The Buffer.
         */
        @Nonnull
        public final DeltaSet<UUID> buffer;
        /**
         * The Error.
         */
        @Nonnull
        public final TensorList error;
        /**
         * The Output dims.
         */
        public final int[] outputDims;
        /**
         * The Tile dimensions.
         */
        public final int[] tileDimensions;
        /**
         * The Length.
         */
        public final int length;
        /**
         * The Position x.
         */
        public final int positionX;
        /**
         * The Total height.
         */
        public final int totalHeight;
        /**
         * The Input index.
         */
        public final int inputIndex;

        private BackpropParams(@Nonnull final Result[] inObj, @Nonnull final DeltaSet<UUID> buffer,
                @Nonnull final TensorList error, final int[] outputDims, final int[] tileDimensions,
                final int length, final int positionX, final int totalHeight, final int inputIndex) {
            this.inObj = inObj;
            this.buffer = buffer;
            this.error = error;
            this.outputDims = outputDims;
            this.tileDimensions = tileDimensions;
            this.length = length;
            this.positionX = positionX;
            this.totalHeight = totalHeight;
            this.inputIndex = inputIndex;
        }

    }
}