org.datavec.image.loader.CifarLoader.java Source code

Introduction

Here is the source code for org.datavec.image.loader.CifarLoader.java
Source

/*
 *  * Copyright 2016 Skymind, Inc.
 *  *
 *  *    Licensed under the Apache License, Version 2.0 (the "License");
 *  *    you may not use this file except in compliance with the License.
 *  *    You may obtain a copy of the License at
 *  *
 *  *        http://www.apache.org/licenses/LICENSE-2.0
 *  *
 *  *    Unless required by applicable law or agreed to in writing, software
 *  *    distributed under the License is distributed on an "AS IS" BASIS,
 *  *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  *    See the License for the specific language governing permissions and
 *  *    limitations under the License.
 */

package org.datavec.image.loader;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.bytedeco.javacpp.opencv_core;
import org.bytedeco.javacv.OpenCVFrameConverter;
import org.datavec.api.berkeley.Pair;
import org.datavec.image.data.ImageWritable;
import org.datavec.image.transform.ColorConversionTransform;
import org.datavec.image.transform.EqualizeHistTransform;
import org.datavec.image.transform.ImageTransform;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.ops.impl.accum.Sum;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.util.FeatureUtil;

import java.io.*;
import java.nio.ByteBuffer;
import java.util.*;

import static org.bytedeco.javacpp.opencv_core.CV_8UC;
import static org.bytedeco.javacpp.opencv_core.Mat;
import static org.bytedeco.javacpp.opencv_imgproc.COLOR_BGR2YCrCb;

/**
 * CifarLoader is loader specific for the Cifar10 dataset
 *
 * Reference: Learning Multiple Layers of Features from Tiny Images, Alex Krizhevsky, 2009.
 *
 * There is a special preProcessor used to normalize the dataset based on Sergey Zagoruyko example
 * https://github.com/szagoruyko/cifar.torch
 */
public class CifarLoader extends NativeImageLoader implements Serializable {
    public final static int NUM_TRAIN_IMAGES = 50000;
    public final static int NUM_TEST_IMAGES = 10000;
    public final static int NUM_LABELS = 10; // Note 6000 imgs per class
    public final static int HEIGHT = 32;
    public final static int WIDTH = 32;
    public final static int CHANNELS = 3;
    public final static int BYTEFILELEN = 3073;

    public static String dataBinUrl = "https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz";
    public static String localDir = "cifar";
    public static String dataBinFile = "cifar-10-batches-bin";
    public static File fullDir = new File(BASE_DIR, FilenameUtils.concat(localDir, dataBinFile));
    public static File meanVarPath = new File(fullDir, "meanVarPath.txt");

    protected static String labelFileName = "batches.meta.txt";
    protected static InputStream inputStream;
    protected static InputStream trainInputStream;
    protected static InputStream testInputStream;
    protected static List<DataSet> inputBatched;
    protected static List<String> labels = new ArrayList<>();

    public static String[] TRAINFILENAMES = { "data_batch_1.bin", "data_batch_2.bin", "data_batch_3.bin",
            "data_batch_4.bin", "data_batch5.bin" };
    public static String TESTFILENAME = "test_batch.bin";
    protected static String trainFilesSerialized = FilenameUtils.concat(fullDir.toString(),
            "cifar_train_serialized");
    protected static String testFilesSerialized = FilenameUtils.concat(fullDir.toString(),
            "cifar_test_serialized.ser");
    protected static boolean train = true;
    public static boolean useSpecialPreProcessCifar = false;
    public static Map<String, String> cifarDataMap = new HashMap<>();

    protected static int height = HEIGHT;
    protected static int width = WIDTH;
    protected static int channels = CHANNELS;
    protected static long seed = System.currentTimeMillis();
    protected static boolean shuffle = true;
    protected int numExamples = 0;
    protected static int numToConvertDS = 10000; // Each file is 10000 images, limiting for file preprocess load
    protected double uMean = 0;
    protected double uStd = 0;
    protected double vMean = 0;
    protected double vStd = 0;
    protected boolean meanStdStored = false;
    protected int loadDSIndex = 0;
    protected DataSet loadDS = new DataSet();
    protected int fileNum = 0;

    public CifarLoader() {
        this(height, width, channels, null, train, useSpecialPreProcessCifar, fullDir, seed, shuffle);
    }

    public CifarLoader(boolean train) {
        this(height, width, channels, null, train, useSpecialPreProcessCifar, fullDir, seed, shuffle);
    }

    public CifarLoader(boolean train, File fullPath) {
        this(height, width, channels, null, train, useSpecialPreProcessCifar, fullPath, seed, shuffle);
    }

    public CifarLoader(int height, int width, int channels, boolean train, boolean useSpecialPreProcessCifar) {
        this(height, width, channels, null, train, useSpecialPreProcessCifar, fullDir, seed, shuffle);
    }

    public CifarLoader(int height, int width, int channels, ImageTransform imgTransform, boolean train,
            boolean useSpecialPreProcessCifar) {
        this(height, width, channels, imgTransform, train, useSpecialPreProcessCifar, fullDir, seed, shuffle);
    }

    public CifarLoader(int height, int width, int channels, ImageTransform imgTransform, boolean train,
            boolean useSpecialPreProcessCifar, boolean shuffle) {
        this(height, width, channels, imgTransform, train, useSpecialPreProcessCifar, fullDir, seed, shuffle);
    }

    public CifarLoader(int height, int width, int channels, ImageTransform imgTransform, boolean train,
            boolean useSpecialPreProcessCifar, File fullPath, long seed, boolean shuffle) {
        super(height, width, channels, imgTransform);
        this.height = height;
        this.width = width;
        this.channels = channels;
        this.train = train;
        this.useSpecialPreProcessCifar = useSpecialPreProcessCifar;
        this.fullDir = fullPath;
        this.seed = seed;
        this.shuffle = shuffle;
        load();
    }

    @Override
    public INDArray asRowVector(File f) throws IOException {
        throw new UnsupportedOperationException();
    }

    @Override
    public INDArray asRowVector(InputStream inputStream) throws IOException {
        throw new UnsupportedOperationException();
    }

    @Override
    public INDArray asMatrix(File f) throws IOException {
        throw new UnsupportedOperationException();
    }

    @Override
    public INDArray asMatrix(InputStream inputStream) throws IOException {
        throw new UnsupportedOperationException();
    }

    public void generateMaps() {
        cifarDataMap.put("filesFilename", new File(dataBinUrl).getName());
        cifarDataMap.put("filesURL", dataBinUrl);
        cifarDataMap.put("filesFilenameUnzipped", dataBinFile);
    }

    private void defineLabels() {
        try {
            File path = new File(fullDir, labelFileName);
            BufferedReader br = new BufferedReader(new FileReader(path));
            String line;

            while ((line = br.readLine()) != null) {
                labels.add(line);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public void load() {
        if (!cifarRawFilesExist() && !fullDir.exists()) {
            generateMaps();
            fullDir.mkdir();

            log.info("Downloading {}...", localDir);
            downloadAndUntar(cifarDataMap, new File(BASE_DIR, localDir));
        }
        try {
            Collection<File> subFiles = FileUtils.listFiles(fullDir, new String[] { "bin" }, true);
            Iterator trainIter = subFiles.iterator();
            trainInputStream = new SequenceInputStream(new FileInputStream((File) trainIter.next()),
                    new FileInputStream((File) trainIter.next()));
            while (trainIter.hasNext()) {
                File nextFile = (File) trainIter.next();
                if (!TESTFILENAME.equals(nextFile.getName()))
                    trainInputStream = new SequenceInputStream(trainInputStream, new FileInputStream(nextFile));
            }
            testInputStream = new FileInputStream(new File(fullDir, TESTFILENAME));
        } catch (Exception e) {
            e.printStackTrace();
        }

        if (labels.isEmpty())
            defineLabels();

        if (useSpecialPreProcessCifar && train && !cifarProcessedFilesExists()) {
            for (int i = fileNum + 1; i <= (TRAINFILENAMES.length); i++) {
                inputStream = trainInputStream;
                DataSet result = convertDataSet(numToConvertDS);
                result.save(new File(trainFilesSerialized + i + ".ser"));
            }
            //            for (int i = 1; i <= (TRAINFILENAMES.length); i++){
            //                normalizeCifar(new File(trainFilesSerialized + i + ".ser"));
            //            }
            inputStream = testInputStream;
            DataSet result = convertDataSet(numToConvertDS);
            result.save(new File(testFilesSerialized));
            //            normalizeCifar(new File(testFilesSerialized));
        }
        setInputStream();
    }

    public boolean cifarRawFilesExist() {
        File f = new File(fullDir, TESTFILENAME);
        if (!f.exists())
            return false;

        for (String name : TRAINFILENAMES) {
            f = new File(fullDir, name);
            if (!f.exists())
                return false;
        }
        return true;
    }

    private boolean cifarProcessedFilesExists() {
        File f;
        if (train) {
            f = new File(trainFilesSerialized + 1 + ".ser");
            if (!f.exists())
                return false;
        } else {
            f = new File(testFilesSerialized);
            if (!f.exists())
                return false;
        }
        return true;
    }

    /**
     * Preprocess and store cifar based on successful Torch approach by Sergey Zagoruyko
     * Reference: https://github.com/szagoruyko/cifar.torch
     */
    public opencv_core.Mat convertCifar(Mat orgImage) {
        numExamples++;
        Mat resImage = new Mat();
        OpenCVFrameConverter.ToMat converter = new OpenCVFrameConverter.ToMat();
        //        ImageTransform yuvTransform = new ColorConversionTransform(new Random(seed), COLOR_BGR2Luv);
        //        ImageTransform histEqualization = new EqualizeHistTransform(new Random(seed), COLOR_BGR2Luv);
        ImageTransform yuvTransform = new ColorConversionTransform(new Random(seed), COLOR_BGR2YCrCb);
        ImageTransform histEqualization = new EqualizeHistTransform(new Random(seed), COLOR_BGR2YCrCb);

        if (converter != null) {
            ImageWritable writable = new ImageWritable(converter.convert(orgImage));
            // TODO determine if need to normalize y before transform - opencv docs rec but currently doing after
            writable = yuvTransform.transform(writable); // Converts to chrome color to help emphasize image objects
            writable = histEqualization.transform(writable); // Normalizes values to further clarify object of interest
            resImage = converter.convert(writable.getFrame());
        }

        return resImage;
    }

    /**
     * Normalize and store cifar based on successful Torch approach by Sergey Zagoruyko
     * Reference: https://github.com/szagoruyko/cifar.torch
     */
    public void normalizeCifar(File fileName) {
        DataSet result = new DataSet();
        result.load(fileName);
        if (!meanStdStored && train) {
            uMean = Math.abs(uMean / numExamples);
            uStd = Math.sqrt(uStd);
            vMean = Math.abs(vMean / numExamples);
            vStd = Math.sqrt(vStd);
            // TODO find cleaner way to store and load (e.g. json or yaml)
            try {
                FileUtils.write(meanVarPath, uMean + "," + uStd + "," + vMean + "," + vStd);
            } catch (IOException e) {
                e.printStackTrace();
            }
            meanStdStored = true;
        } else if (uMean == 0 && meanStdStored) {
            try {
                String[] values = FileUtils.readFileToString(meanVarPath).split(",");
                uMean = Double.parseDouble(values[0]);
                uStd = Double.parseDouble(values[1]);
                vMean = Double.parseDouble(values[2]);
                vStd = Double.parseDouble(values[3]);

            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        for (int i = 0; i < result.numExamples(); i++) {
            INDArray newFeatures = result.get(i).getFeatureMatrix();
            newFeatures.tensorAlongDimension(0, new int[] { 0, 2, 3 }).divi(255);
            newFeatures.tensorAlongDimension(1, new int[] { 0, 2, 3 }).subi(uMean).divi(uStd);
            newFeatures.tensorAlongDimension(2, new int[] { 0, 2, 3 }).subi(vMean).divi(vStd);
            result.get(i).setFeatures(newFeatures);
        }
        result.save(fileName);
    }

    public Pair<INDArray, opencv_core.Mat> convertMat(byte[] byteFeature) {
        INDArray label = FeatureUtil.toOutcomeVector(byteFeature[0], NUM_LABELS);
        ; // first value in the 3073 byte array
        opencv_core.Mat image = new opencv_core.Mat(HEIGHT, WIDTH, CV_8UC(CHANNELS)); // feature are 3072
        ByteBuffer imageData = image.createBuffer();

        for (int i = 0; i < HEIGHT * WIDTH; i++) {
            imageData.put(3 * i, byteFeature[i + 1 + 2 * height * width]); // blue
            imageData.put(3 * i + 1, byteFeature[i + 1 + height * width]); // green
            imageData.put(3 * i + 2, byteFeature[i + 1]); // red
        }
        //        if (useSpecialPreProcessCifar) {
        //            image = convertCifar(image);
        //        }

        return new Pair<>(label, image);
    }

    public DataSet convertDataSet(int num) {
        int batchNumCount = 0;
        List<DataSet> dataSets = new ArrayList<>();
        Pair<INDArray, opencv_core.Mat> matConversion;
        byte[] byteFeature = new byte[BYTEFILELEN];

        try {
            while (inputStream.read(byteFeature) != -1 && batchNumCount != num) {
                matConversion = convertMat(byteFeature);
                try {
                    dataSets.add(new DataSet(asMatrix(matConversion.getSecond()), matConversion.getFirst()));
                    batchNumCount++;
                } catch (Exception e) {
                    break;
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

        DataSet result = new DataSet();
        try {
            result = result.merge(dataSets);
        } catch (IllegalArgumentException e) {
            return result;
        }
        double uTempMean, vTempMean;
        for (DataSet data : result) {
            try {
                if (useSpecialPreProcessCifar) {
                    INDArray uChannel = data.getFeatures().tensorAlongDimension(1, new int[] { 0, 2, 3 });
                    INDArray vChannel = data.getFeatures().tensorAlongDimension(2, new int[] { 0, 2, 3 });
                    uTempMean = uChannel.mean(new int[] { 0, 2, 3 }).getDouble(0);
                    // TODO INDArray.var result is incorrect based on dimensions passed in thus using manual
                    uStd += varManual(uChannel, uTempMean);
                    uMean += uTempMean;
                    vTempMean = vChannel.mean(new int[] { 0, 2, 3 }).getDouble(0);
                    vStd += varManual(vChannel, vTempMean);
                    vMean += vTempMean;
                    data.setFeatures(data.getFeatureMatrix().div(255));
                } else {
                    // normalize if just input stream and not special preprocess
                    data.setFeatures(data.getFeatureMatrix().div(255));
                }
            } catch (IllegalArgumentException e) {
                throw new IllegalStateException(
                        "The number of channels must be 3 to special preProcess Cifar with.");
            }
        }
        if (shuffle && num > 1)
            result.shuffle(seed);
        return result;
    }

    public double varManual(INDArray x, double mean) {
        INDArray xSubMean = x.sub(mean);
        INDArray squared = xSubMean.muli(xSubMean);
        double accum = Nd4j.getExecutioner().execAndReturn(new Sum(squared)).getFinalResult().doubleValue();
        return accum / x.ravel().length();
    }

    public DataSet next(int batchSize) {
        return next(batchSize, 0);
    }

    public DataSet next(int batchSize, int exampleNum) {
        List<DataSet> temp = new ArrayList<>();
        DataSet result;
        if (cifarProcessedFilesExists() && useSpecialPreProcessCifar) {
            if (exampleNum == 0 || ((exampleNum / fileNum) == numToConvertDS && train)) {
                fileNum++;
                if (train)
                    loadDS.load(new File(trainFilesSerialized + fileNum + ".ser"));
                loadDS.load(new File(testFilesSerialized));
                // Shuffle all examples in file before batching happens also for each reset
                if (shuffle && batchSize > 1)
                    loadDS.shuffle(seed);
                loadDSIndex = 0;
                //          inputBatched = loadDS.batchBy(batchSize);
            }
            // TODO loading full train dataset when using cuda causes memory error - find way to load into list off gpu
            //            result = inputBatched.get(batchNum);
            for (int i = 0; i < batchSize; i++) {
                if (loadDS.get(loadDSIndex) != null)
                    temp.add(loadDS.get(loadDSIndex));
                else
                    break;
                loadDSIndex++;
            }
            if (temp.size() > 1)
                result = DataSet.merge(temp);
            else
                result = temp.get(0);
        } else {
            result = convertDataSet(batchSize);
        }
        return result;
    }

    public InputStream getInputStream() {
        return inputStream;
    }

    public void setInputStream() {
        if (train)
            inputStream = trainInputStream;
        else
            inputStream = testInputStream;
    }

    public List<String> getLabels() {
        return labels;
    }

    public void reset() {
        numExamples = 0;
        fileNum = 0;
        load();
    }

    public void train() {
        train = true;
        setInputStream();
    }

    public void test() {
        train = false;
        setInputStream();
        shuffle = false;
        numExamples = 0;
        fileNum = 0;
    }

}