de.lmu.ifi.dbs.jfeaturelib.utils.Extractor.java Source code

Java tutorial

Introduction

Here is the source code for de.lmu.ifi.dbs.jfeaturelib.utils.Extractor.java

Source

/*
 * This file is part of the JFeatureLib project: http://jfeaturelib.googlecode.com
 * JFeatureLib is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * JFeatureLib is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with JFeatureLib; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 * 
 * You are kindly asked to refer to the papers of the according authors which 
 * should be mentioned in the Javadocs of the respective classes as well as the 
 * JFeatureLib project itself.
 * 
 * Hints how to cite the projects can be found at 
 * https://code.google.com/p/jfeaturelib/wiki/Citation
 */
package de.lmu.ifi.dbs.jfeaturelib.utils;

import de.lmu.ifi.dbs.jfeaturelib.Descriptor.Supports;
import de.lmu.ifi.dbs.jfeaturelib.LibProperties;
import de.lmu.ifi.dbs.jfeaturelib.features.FeatureDescriptor;
import de.lmu.ifi.dbs.utilities.Arrays2;
import ij.ImagePlus;
import ij.io.Opener;
import ij.process.ImageProcessor;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.net.URISyntaxException;
import java.nio.channels.Channels;
import java.nio.channels.FileChannel;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOCase;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.filefilter.FalseFileFilter;
import org.apache.commons.io.filefilter.IOFileFilter;
import org.apache.commons.io.filefilter.SuffixFileFilter;
import org.apache.commons.io.filefilter.TrueFileFilter;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Level;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;

//import static com.google.common.base.Preconditions.*;
/**
 * Class used as a commandline tool to extract features from directories of images.
 *
 * The features are then written to a outfile.
 *
 * @author Franz
 */
public class Extractor {

    private static final Logger log = Logger.getLogger(Extractor.class);
    /**
     * Timeout used for the thread pool. Just set it to a large enough value so that all threads will terminate.
     */
    private static final int TERMINATION_TIMEOUT = 100; // days

    //
    @SuppressWarnings("FieldMayBeFinal")
    @Option(name = "--threads", usage = "amount of threads (defaults to amount of available processors))")
    private int threads = Runtime.getRuntime().availableProcessors();
    // 
    @Option(name = "-d", aliases = {
            "--src-dir" }, usage = "directory containing images (default: execution directory)")
    File imageDirectory;
    //
    @SuppressWarnings("FieldMayBeFinal")
    @Option(name = "-r", usage = "recursively descend into directories (default: no)")
    private boolean recursive = false;
    //
    @SuppressWarnings("FieldMayBeFinal")
    @Option(name = "-o", aliases = { "--output-dir",
            "--output" }, usage = "output to this file (default: features.csv, - for stdout)")
    private String outFile = "features.csv";
    //
    @Option(name = "-m", aliases = { "--masks-dir" }, usage = "directory containing masks")
    File maskDirectory = null;
    //
    @Option(name = "--append", usage = "append to output file (default: false = overwrite)")
    private boolean append;
    //
    @Option(name = "-nh", usage = "omit headerline")
    private boolean omitHeader = false;
    //
    @SuppressWarnings("FieldMayBeFinal")
    @Option(name = "-D", aliases = {
            "--descriptor" }, usage = "Use this feature descriptor (e.G: Sift). The string "
                    + "specified here must be one of the classes in de.lmu.ifi.dbs.jfeaturelib.features. If in doupt, "
                    + "--list-capabilities can be used to get an overview.")
    private String descriptor = null;
    //
    @SuppressWarnings("FieldMayBeFinal")
    @Option(name = "-c", usage = "image class that should be written to the output file")
    private String imageClass = null;
    //
    @SuppressWarnings("FieldMayBeFinal")
    @Option(name = "--list-capabilities", usage = "list the registered FeatureDescriptors and the output of their supports() method")
    private boolean listCapabilities = false;
    //
    @SuppressWarnings("FieldMayBeFinal")
    @Option(name = "--help", usage = "show this screen")
    private boolean showHelp = false;
    //
    @SuppressWarnings("FieldMayBeFinal")
    @Option(name = "-v", usage = "show JFeatureLib debug messages")
    private boolean debugJFeatureLib = false;
    //
    @SuppressWarnings("FieldMayBeFinal")
    @Option(name = "--unpack-properties", usage = "extracts the default properties and loggiing properties into the current directory")
    private boolean unpackProperties = false;
    // other command line parameters than options
    // @Argument
    // private List<String> arguments = new ArrayList<>();
    private static final int WRITE_BUFFER = 1024 * 1024; // bytes
    private static final String NL = "\n";
    //
    private final LibProperties properties;
    private final String[] imageFormats;
    private final String separator = ", ";
    private int lineCounter = 0;
    // file exists and has a length > 0
    private boolean fileExists;
    // the descriptor to use
    private Class descriptorClazz;
    private Writer writer;
    private ExecutorService pool;

    public static void main(String[] args) throws Exception {
        try {
            Extractor extractor = new Extractor();
            CmdLineParser parser = new CmdLineParser(extractor);
            parser.setUsageWidth(100);

            try {
                // if nothing is parameterized, just assume to display the help screen
                if (args.length == 0) {
                    args = new String[] { "--help" };
                }
                parser.parseArgument(args);
            } catch (CmdLineException t) {
                log.warn(t);
                printError(parser, t);
                System.exit(1);
            }

            // maybe adjust log level according to CL value
            if (extractor.debugJFeatureLib) {
                LogManager.getRootLogger().setLevel(Level.DEBUG);
                Logger.getLogger("de.lmu").setLevel(Level.DEBUG);
            }

            // process commands that should not start execution
            if (extractor.showHelp) {
                printHelp(parser);
                System.exit(0);

            } else if (extractor.listCapabilities) {
                extractor.listFeatureDescriptorCapabilities();
                System.exit(0);

            } else if (extractor.unpackProperties) {
                extractor.unpackProperties();
                System.exit(0);

            } else {
                // okay everything is fine, validate input
                try {
                    extractor.validateInput();
                } catch (Throwable t) {
                    log.warn("input validation failed.", t);
                    printError(parser, t);
                    System.exit(1);
                }

                // and finally, if validation is fine, start
                extractor.process();
            }
        } catch (IOException | InstantiationException | IllegalAccessException | URISyntaxException t) {
            log.warn("Uncaught Exception: ", t);
            throw t;
        }
    }

    /**
     * Print a brief error message plus the help screen
     *
     * @param parser
     * @param throwable
     */
    private static void printError(CmdLineParser parser, Throwable throwable) throws IOException {
        printHelp(parser);
        System.err.println("Message: " + throwable.getMessage());
        System.err.println("----------------------------------------------------------");
    }

    /**
     * Prints the help screen
     *
     * @param parser
     * @throws IOException
     */
    private static void printHelp(CmdLineParser parser) throws IOException {
        System.out.println("----------------------------------------------------------");
        System.out.println("");
        System.out.println("The extractor utility can be started with");
        System.out.println("  java -jar JFeatureLib-x.y.z-SNAPSHOT-jar-with-dependencies.jar --help");
        System.out.println("----------------------------------------------------------");
        String cite = IOUtils.toString(Extractor.class.getResource("/cite.txt"));
        System.out.println(cite);
        System.out.println("----------------------------------------------------------");
        parser.printUsage(System.out);
        System.out.println("----------------------------------------------------------");
    }

    /**
     * reads the shipped properties file and copies it into the current execution directory
     */
    private void unpackProperties() {
        try {
            try (InputStream is = LibProperties.class.getResourceAsStream("/" + LibProperties.BASE_FILE.getName());
                    FileChannel dst = new FileOutputStream(LibProperties.BASE_FILE).getChannel()) {
                dst.transferFrom(Channels.newChannel(is), 0, Integer.MAX_VALUE);
                log.info("wrote jfeaturelib.properties");
            }
            try (InputStream is = LibProperties.class.getResourceAsStream("/log4j.properties");
                    FileChannel dst = new FileOutputStream("log4j.properties").getChannel()) {
                dst.transferFrom(Channels.newChannel(is), 0, Integer.MAX_VALUE);
                log.info("wrote log4j.properties");
            }
        } catch (IOException ex) {
            log.warn("The properties could not be extracted. Please see the log for more information.", ex);
        }
    }

    /**
     * Prints the supports capabilities of the feature descriptors and prints the string to System.out.
     */
    private void listFeatureDescriptorCapabilities()
            throws InstantiationException, IllegalAccessException, IOException, URISyntaxException {
        Package fdPackage = FeatureDescriptor.class.getPackage();
        int offset = fdPackage.getName().length() + 1;

        // search for the descriptors
        PackageScanner<FeatureDescriptor> scanner = new PackageScanner<>();
        List<Class<FeatureDescriptor>> classes = scanner.scanForClass(fdPackage, FeatureDescriptor.class);

        // find the longest name to make a nice output
        int maxNameLength = 0;
        for (Class<FeatureDescriptor> fd : classes) {
            maxNameLength = Math.max(fd.getName().length() - offset, maxNameLength);
        }

        // sort the classes by class name
        Collections.sort(classes, new ClassNameComparator());

        // now we know the longest descriptor name - build the output string
        StringBuilder outString = new StringBuilder();
        for (Class<FeatureDescriptor> fd : classes) {
            String name = fd.getName().substring(offset);
            outString.append(StringUtils.rightPad(name, maxNameLength));
            outString.append(" : ");
            outString.append(fd.newInstance().supports().toString());
            outString.append("\n");
        }
        System.out.println(outString);
    }

    /**
     * The constructor that should not be called outside this class (Except Testclasses)
     *
     * @throws IOException
     */
    Extractor() throws IOException {
        properties = LibProperties.get();
        imageFormats = initImageFormats(properties);
    }

    /**
     * Read the image formats from the properties.
     *
     * @param libProperties
     * @return array of image formats
     */
    private String[] initImageFormats(LibProperties libProperties) {
        String[] formats = libProperties.getString(LibProperties.IMAGE_FORMATS).split(" *, *");
        for (int i = 0; i < formats.length; i++) {
            formats[i] = formats[i].trim();
        }
        return formats;
    }

    /**
     * start the actual work
     */
    private void process() {
        validateInput();

        log.debug("creating mask & file list");
        Collection<File> maskList = createFileList(maskDirectory);
        Collection<File> imageList = createFileList(imageDirectory);
        HashMap<File, File> tuples = findTuples(imageList, maskList);

        openWriter();

        openPool();
        processImages(tuples);
        closePool();

        closeWriter();
    }

    /**
     * Validates the input parameters like descriptor names (nullchecks) and ensures that the required files and
     * directories are existent.
     *
     * @throws IllegalArgumentException
     */
    private void validateInput() throws IllegalArgumentException {
        log.debug("validating");
        if (descriptor == null) {
            throw new NullPointerException("descriptor must not be null");
        }

        try { // check if the descriptor class is valid
            String base = FeatureDescriptor.class.getPackage().getName();
            descriptorClazz = Class.forName(base + "." + descriptor);
            if (!FeatureDescriptor.class.isAssignableFrom(descriptorClazz)) {
                throw new IllegalArgumentException("The class must derive from FeatureDescriptor");
            }

            // check if masking is required and supported
            FeatureDescriptor fd = (FeatureDescriptor) descriptorClazz.newInstance();
            boolean supportsMasking = fd.supports().contains(Supports.Masking);
            if (maskDirectory != null && !supportsMasking) {
                log.warn(
                        "A masking directory is set but the chosen descriptor does NOT support masking. Masking will be ignored!");
                maskDirectory = null;
            }
        } catch (ClassNotFoundException | InstantiationException | IllegalAccessException ex) {
            log.warn(ex.getMessage(), ex);
            throw new IllegalArgumentException("the descriptor class does not exist or cannot be created");
        }

        // can the image directory be accessed
        if (imageDirectory == null || !imageDirectory.isDirectory() || !imageDirectory.canRead()) {
            throw new IllegalArgumentException("the source directory cannot be read or does not exist");
        }

        // can the mask directory be accessed
        if (maskDirectory != null && (!maskDirectory.isDirectory() || !maskDirectory.canRead())) {
            throw new IllegalArgumentException("the mask directory cannot be read or does not exist");
        }

        // can the output file be written?
        if (outFile == null) {
            throw new IllegalArgumentException("the output file is not valid");
        }
        // further check the file if it is not stdout
        if (!outFile.equals("-")) {
            File f = new File(outFile);
            if (f.exists() && !f.canWrite()) {
                throw new IllegalArgumentException("the output file is not valid or not writable");
            }
            try { // create the output file or fail
                if (!outFile.equals("-")) {
                    new File(outFile).createNewFile();
                    fileExists = (f.exists() && f.length() > 0);
                }
            } catch (IOException ex) {
                log.warn(ex.getMessage(), ex);
                throw new IllegalArgumentException("the output file could not be created");
            }
        }

        // check if an image class is set and valid
        if (imageClass != null && !imageClass.matches("^\\w+$")) {
            throw new IllegalArgumentException(
                    "the image class must only contain word characters and not whitespace");
        }
    }

    /**
     * creates a list of image files in the specified directory and all subdirectories (if recursive is enabled)
     *
     * @param dir directory to start from
     * @return a list of image files in this directory (possibly empty)
     */
    Collection<File> createFileList(File dir) {
        if (dir == null) {
            log.debug("directory is null, returning empty list");
            return Collections.EMPTY_LIST;
        } else {
            SuffixFileFilter sff = new SuffixFileFilter(imageFormats, IOCase.INSENSITIVE);
            IOFileFilter recursiveFilter = recursive ? TrueFileFilter.INSTANCE : FalseFileFilter.INSTANCE;
            return FileUtils.listFiles(dir, sff, recursiveFilter);
        }
    }

    /**
     * opens the BufferedWriter which is used to write the output
     */
    private void openWriter() {
        log.debug("open writer");
        try {
            if (outFile.equals("-")) {
                writer = new BufferedWriter(new OutputStreamWriter(System.out), WRITE_BUFFER);
            } else {
                writer = new BufferedWriter(new FileWriter(outFile, append), WRITE_BUFFER);
            }
        } catch (IOException ex) {
            log.warn(ex.getMessage(), ex);
            throw new IllegalStateException("could not open output file for writing");
        }
    }

    /**
     * closes the output writer
     */
    private void closeWriter() {
        log.debug("close writer");
        try {
            writer.close();
        } catch (IOException ex) {
            log.warn(ex.getMessage(), ex);
            throw new IllegalStateException("could not close output file");
        }
    }

    /**
     * submits an ExtractionTask for each tuple to the thread pool
     *
     * @param tuples
     */
    private void processImages(HashMap<File, File> tuples) {
        log.debug("process images");
        for (Map.Entry<File, File> entry : tuples.entrySet()) {
            pool.submit(new ExtractionTask(entry.getKey(), entry.getValue()));
        }
    }

    /**
     * creates a new thread pool for the image extraction tasks
     */
    private void openPool() {
        log.debug("open pool");
        pool = Executors.newFixedThreadPool(threads);
    }

    /**
     * closes the thread pool and awaits termination
     */
    private void closePool() {
        log.debug("close pool");
        try {
            pool.shutdown();
            pool.awaitTermination(TERMINATION_TIMEOUT, TimeUnit.DAYS);
        } catch (InterruptedException ex) {
            log.warn(ex.getMessage(), ex);
            throw new IllegalStateException("error while shutting down pool");
        }
    }

    /**
     * Synchronized method to write all features that were extracted from the given file to the output writer.
     *
     * @param file
     * @param features
     * @throws IOException
     */
    private synchronized void writeOutput(File file, List<double[]> features) throws IOException {
        // we are appending to an existing file. so start with a new line
        if (lineCounter == 0 && fileExists) {
            writer.append(NL);
        }

        // write head?
        if (!omitHeader) {
            omitHeader = true;
            if (imageClass != null) {
                writer.append("class" + separator);
            }
            writer.append("filename");
            for (int i = 0; i < features.get(0).length; i++) {
                writer.append(separator + i);
            }
            writer.append(NL);
        }

        // write one line for each feature
        for (double[] feature : features) {
            // a second line is being written. Thus prepend a new line
            if (lineCounter++ > 0) {
                writer.append(NL);
            }
            // prepend image class (if given)
            if (imageClass != null) {
                writer.append(imageClass).append(separator);
            }

            // write file name
            writer.append('"').append(file.getName()).append('"').append(separator);

            // serialize the feature values
            writer.append(Arrays2.join(feature, separator));
        }
    }

    /**
     * Try to find and map image files and mask files together.
     *
     * Thereby the relative path (starting from imageDirectory and maskDirectory) must be equal. A different file suffix
     * is allowed. Thus, an image [imageDirectory]/classA/car.jpg can have a mask file [maskDirectory]/classA/car.png
     *
     * @param imageList
     * @param maskList
     * @return list of image to mask tuples
     */
    HashMap<File, File> findTuples(Collection<File> imageList, Collection<File> maskList) {
        assert imageDirectory != null : "image Directory must not be null";

        HashMap<File, File> map = new HashMap<>(imageList.size());
        String maskBasePath = maskDirectory == null ? null : maskDirectory.getAbsolutePath();

        String imageBasePath = imageDirectory.getAbsolutePath();
        String imageSuffixes = Arrays2.join(imageFormats, "|");
        String imageSuffixReplacement = "\\.(" + imageSuffixes + ")$";

        for (File imageFile : imageList) {
            File correspondingMask = null;

            // only search for a mask - well - if a mask directory was set
            if (maskDirectory != null && maskDirectory.exists()) {
                // get base image path starting from the imageDirectory
                // -> foo/bar/image.jpeg
                String imgPart = imageFile.getAbsolutePath().replace(imageBasePath, "");
                // remove image suffix
                // -> foo/bar/image
                imgPart = imgPart.replaceAll(imageSuffixReplacement, "");

                // search mask file
                for (File maskFile : maskList) {
                    String maskPath = maskFile.getAbsolutePath();
                    // FIXME is this correct?
                    if (maskPath.startsWith(maskBasePath + imgPart)) {
                        correspondingMask = maskFile;
                        break;
                    }
                }

                // associate image with mask
                if (correspondingMask == null) {
                    log.warn("no mask file found for " + imageFile.getAbsolutePath());
                }
            }

            map.put(imageFile, correspondingMask);
        }

        return map;
    }

    /**
     * This task is used to read image data from disk, extract features and initiate writing the output
     */
    class ExtractionTask implements Runnable {

        /**
         * the image file which should be processed
         */
        private final File image;
        /**
         * possibly set mask (may be null)
         */
        private final File mask;

        ExtractionTask(File image, File mask) {
            if (image == null) {
                throw new NullPointerException("image must not be null");
            }
            this.image = image;
            this.mask = mask;
        }

        @Override
        @SuppressWarnings({ "BroadCatchBlock", "TooBroadCatch" })
        public void run() {
            try {
                long time = System.currentTimeMillis();

                // create some logging output
                if (log.isDebugEnabled()) {
                    String msg = "processing file " + image.getName();
                    if (mask != null) {
                        msg += " using mask: " + mask.getName();
                    }
                    log.debug(msg);
                }

                // read image and mask (if set)
                ImageProcessor processor = getProcessor(image);
                ImageProcessor maskProcessor = getProcessor(mask);
                processor.setMask(maskProcessor);

                // extraction
                FeatureDescriptor fd = (FeatureDescriptor) descriptorClazz.newInstance();
                fd.setProperties(properties);
                fd.run(processor);
                List<double[]> features = fd.getFeatures();

                // log some stats
                if (log.isDebugEnabled()) {
                    time = System.currentTimeMillis() - time;
                    log.debug("processed " + image.getName() + " in " + time + "ms");
                }

                // synchronously write to file
                writeOutput(image, features);
            } catch (IOException | InstantiationException | IllegalAccessException ex) {
                log.warn(ex.getMessage(), ex);
            } catch (Throwable ex) {
                log.error(ex.getMessage(), ex);
            }
        }

        /**
         * Null safe image reader.
         *
         * @param path to the image file
         * @return image processor or null
         */
        private ImageProcessor getProcessor(File path) {
            ImageProcessor ip = null;
            if (path != null) {
                ImagePlus iplus = new Opener().openImage(path.getAbsolutePath());
                ip = iplus.getProcessor();
            }
            return ip;
        }
    }

    /**
     * compares classes according to their class name (case sensitive)
     */
    private static class ClassNameComparator implements Comparator<Class> {

        @Override
        public int compare(Class o1, Class o2) {
            return o1.getName().compareTo(o2.getName());
        }
    }
}