org.codice.ddf.commands.catalog.IngestCommand.java Source code

Java tutorial

Introduction

Here is the source code for org.codice.ddf.commands.catalog.IngestCommand.java

Source

/**
 * Copyright (c) Codice Foundation
 * <p>
 * This is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser
 * General Public License as published by the Free Software Foundation, either version 3 of the
 * License, or any later version.
 * <p>
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details. A copy of the GNU Lesser General Public License
 * is distributed along with this program and can be found at
 * <http://www.gnu.org/licenses/lgpl.html>.
 */
package org.codice.ddf.commands.catalog;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.nio.file.FileVisitOption;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.RejectedExecutionHandler;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Stream;

import org.apache.commons.lang.StringUtils;
import org.apache.felix.gogo.commands.Argument;
import org.apache.felix.gogo.commands.Command;
import org.apache.felix.gogo.commands.Option;
import org.codice.ddf.commands.catalog.facade.CatalogFacade;
import org.codice.ddf.platform.util.Exceptions;
import org.fusesource.jansi.Ansi;
import org.joda.time.Period;
import org.joda.time.format.PeriodFormatter;
import org.joda.time.format.PeriodFormatterBuilder;
import org.osgi.framework.BundleContext;
import org.osgi.framework.InvalidSyntaxException;
import org.osgi.framework.ServiceReference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import ddf.catalog.Constants;
import ddf.catalog.data.Metacard;
import ddf.catalog.data.impl.AttributeImpl;
import ddf.catalog.operation.CreateRequest;
import ddf.catalog.operation.CreateResponse;
import ddf.catalog.operation.impl.CreateRequestImpl;
import ddf.catalog.source.IngestException;
import ddf.catalog.source.SourceUnavailableException;
import ddf.catalog.transform.CatalogTransformerException;
import ddf.catalog.transform.InputTransformer;

/**
 * Custom Karaf command for ingesting records into the Catalog.
 */
@Command(scope = CatalogCommands.NAMESPACE, name = "ingest", description = "Ingests Metacards into the Catalog.")
public class IngestCommand extends CatalogCommands {

    private static final Logger LOGGER = LoggerFactory.getLogger(IngestCommand.class);

    private static final Logger INGEST_LOGGER = LoggerFactory.getLogger(Constants.INGEST_LOGGER_NAME);

    private static final int DEFAULT_BATCH_SIZE = 500;

    private final PeriodFormatter timeFormatter = new PeriodFormatterBuilder().printZeroRarelyLast().appendDays()
            .appendSuffix(" day", " days").appendSeparator(" ").appendHours().appendSuffix(" hour", " hours")
            .appendSeparator(" ").appendMinutes().appendSuffix(" minute", " minutes").appendSeparator(" ")
            .appendSeconds().appendSuffix(" second", " seconds").toFormatter();

    private final AtomicInteger ingestCount = new AtomicInteger();

    private final AtomicInteger ignoreCount = new AtomicInteger();

    private final AtomicBoolean doneBuildingQueue = new AtomicBoolean();

    private final AtomicInteger processingThreads = new AtomicInteger();

    private final AtomicInteger fileCount = new AtomicInteger(Integer.MAX_VALUE);

    @Argument(name = "File path or Directory path", description = "File path to a record or a directory of files to be ingested. Paths are absolute and must be in quotes."
            + " This command can only detect roughly 2 billion records in one folder. Individual operating system limits might also apply.", index = 0, multiValued = false, required = true)
    String filePath = null;

    // DDF-535: Remove this argument in ddf-3.0
    @Argument(name = "Batch size", description = "Number of Metacards to ingest at a time. Change this argument based on system memory and catalog provider limits. [DEPRECATED: use --batchsize option instead]", index = 1, multiValued = false, required = false)
    int deprecatedBatchSize = DEFAULT_BATCH_SIZE;

    // DDF-535: remove "Transformer" alias in ddf-3.0
    @Option(name = "--transformer", required = false, aliases = { "-t",
            "Transformer" }, multiValued = false, description = "The metacard transformer ID to use to transform data files into metacards. The default metacard transformer is the Java serialization transformer.")
    String transformerId = DEFAULT_TRANSFORMER_ID;

    // DDF-535: Remove "Multithreaded" alias in ddf-3.0
    @Option(name = "--multithreaded", required = false, aliases = { "-m",
            "Multithreaded" }, multiValued = false, description = "Number of threads to use when ingesting. Setting this value too high for your system can cause performance degradation.")
    int multithreaded = 8;

    // DDF-535: remove "-d" and "Ingest Failure Directory" aliases in ddf-3.0
    @Option(name = "--failedDir", required = false, aliases = { "-d", "-f",
            "Ingest Failure Directory" }, multiValued = false, description = "The directory to put files that failed to ingest.  Using this option will force a batch size of 1.")
    String failedDir = null;

    @Option(name = "--batchsize", required = false, aliases = {
            "-b" }, multiValued = false, description = "Number of Metacards to ingest at a time. Change this argument based on system memory and catalog provider limits.")
    int batchSize = DEFAULT_BATCH_SIZE;

    @Option(name = "--ignore", required = false, aliases = {
            "-i" }, multiValued = true, description = "File extension(s) or file name(s) to ignore during ingestion (-i '.txt' -i 'image.jpg' -i 'file' )")
    List<String> ignoreList;

    File failedIngestDirectory = null;

    InputTransformer transformer = null;

    @Override
    protected Object executeWithSubject() throws Exception {

        final CatalogFacade catalog = getCatalog();
        final File inputFile = new File(filePath);

        if (!inputFile.exists()) {
            printErrorMessage("File or directory [" + filePath + "] must exist.");
            console.println("If the file does indeed exist, try putting the path in quotes.");
            return null;
        }

        if (deprecatedBatchSize != DEFAULT_BATCH_SIZE) {
            // user specified the old style batch size, so use that
            printErrorMessage(
                    "Batch size positional argument is DEPRECATED, please use --batchsize option instead.");
            batchSize = deprecatedBatchSize;
        }

        if (batchSize <= 0) {
            printErrorMessage(
                    "A batch size of [" + batchSize + "] was supplied. Batch size must be greater than 0.");
            return null;
        }

        if (!StringUtils.isEmpty(failedDir)) {
            failedIngestDirectory = new File(failedDir);
            if (!verifyFailedIngestDirectory()) {
                return null;
            }

            /**
             * Batch size is always set to 1, when using an Ingest Failure Directory.  If a batch size is specified by the user, issue 
             * a warning stating that a batch size of 1 will be used.
             */
            if (batchSize != DEFAULT_BATCH_SIZE) {
                console.println("WARNING: An ingest failure directory was supplied in addition to a batch size of "
                        + batchSize
                        + ". When using an ingest failure directory, the batch size must be 1. Setting batch size to 1.");
            }

            batchSize = 1;
        }

        BundleContext bundleContext = getBundleContext();
        if (!DEFAULT_TRANSFORMER_ID.equals(transformerId)) {
            ServiceReference[] refs = null;

            try {
                refs = bundleContext.getServiceReferences(InputTransformer.class.getName(),
                        "(|" + "(" + Constants.SERVICE_ID + "=" + transformerId + ")" + ")");
            } catch (InvalidSyntaxException e) {
                throw new IllegalArgumentException("Invalid transformer transformerId: " + transformerId, e);
            }

            if (refs == null || refs.length == 0) {
                throw new IllegalArgumentException("Transformer " + transformerId + " not found");
            } else {
                transformer = (InputTransformer) bundleContext.getService(refs[0]);
            }
        }

        Stream<Path> ingestStream = Files.walk(inputFile.toPath(), FileVisitOption.FOLLOW_LINKS);

        int totalFiles = (inputFile.isDirectory()) ? inputFile.list().length : 1;
        fileCount.getAndSet(totalFiles);

        final ArrayBlockingQueue<Metacard> metacardQueue = new ArrayBlockingQueue<>(batchSize * multithreaded);

        ExecutorService queueExecutor = Executors.newSingleThreadExecutor();

        final long start = System.currentTimeMillis();

        printProgressAndFlush(start, fileCount.get(), 0);

        queueExecutor.submit(() -> buildQueue(ingestStream, metacardQueue, start));

        final ScheduledExecutorService batchScheduler = Executors.newSingleThreadScheduledExecutor();

        BlockingQueue<Runnable> blockingQueue = new ArrayBlockingQueue<>(multithreaded);
        RejectedExecutionHandler rejectedExecutionHandler = new ThreadPoolExecutor.CallerRunsPolicy();
        ExecutorService executorService = new ThreadPoolExecutor(multithreaded, multithreaded, 0L,
                TimeUnit.MILLISECONDS, blockingQueue, rejectedExecutionHandler);

        submitToCatalog(batchScheduler, executorService, metacardQueue, catalog, start);

        while (!doneBuildingQueue.get() || processingThreads.get() != 0) {
            try {
                TimeUnit.SECONDS.sleep(2);
            } catch (InterruptedException e) {
                LOGGER.error("Ingest 'Waiting for processing to finish' thread interrupted: {}", e);
            }
        }

        try {
            queueExecutor.shutdown();
            executorService.shutdown();
            batchScheduler.shutdown();
        } catch (SecurityException e) {
            LOGGER.error("Executor service shutdown was not permitted: {}", e);
        }

        printProgressAndFlush(start, fileCount.get(), ingestCount.get() + ignoreCount.get());
        long end = System.currentTimeMillis();
        console.println();
        String elapsedTime = timeFormatter.print(new Period(start, end).withMillis(0));

        console.println();
        console.printf(" %d file(s) ingested in %s %n", ingestCount.get(), elapsedTime);

        LOGGER.info("{} file(s) ingested in {} [{} records/sec]", ingestCount.get(), elapsedTime,
                calculateRecordsPerSecond(ingestCount.get(), start, end));
        INGEST_LOGGER.info("{} file(s) ingested in {} [{} records/sec]", ingestCount.get(), elapsedTime,
                calculateRecordsPerSecond(ingestCount.get(), start, end));

        if (fileCount.get() != ingestCount.get()) {
            console.println();
            if ((fileCount.get() - ingestCount.get() - ignoreCount.get()) >= 1) {
                String failedAmount = Integer.toString(fileCount.get() - ingestCount.get() - ignoreCount.get());
                printErrorMessage(
                        failedAmount + " file(s) failed to be ingested.  See the ingest log for more details.");
                INGEST_LOGGER.warn("{} files(s) failed to be ingested.", failedAmount);
            }
            if (ignoreList != null) {
                String ignoredAmount = Integer.toString(ignoreCount.get());
                printColor(Ansi.Color.YELLOW,
                        ignoredAmount + " file(s) ignored.  See the ingest log for more details.");
                INGEST_LOGGER.warn("{} files(s) were ignored.", ignoredAmount);
            }
        }
        console.println();

        return null;
    }

    /**
     * Helper method to build ingest log strings
     */
    private String buildIngestLog(ArrayList<Metacard> metacards) {
        StringBuilder strBuilder = new StringBuilder();

        final String newLine = System.getProperty("line.separator");

        for (int i = 0; i < metacards.size(); i++) {
            Metacard card = metacards.get(i);
            strBuilder.append(newLine).append("Batch #: ").append(i + 1).append(" | ");
            if (card != null) {
                if (card.getTitle() != null) {
                    strBuilder.append("Metacard Title: ").append(card.getTitle()).append(" | ");
                }
                if (card.getId() != null) {
                    strBuilder.append("Metacard ID: ").append(card.getId()).append(" | ");
                }
            } else {
                strBuilder.append("Null Metacard");
            }
        }
        return strBuilder.toString();
    }

    private void logIngestException(IngestException exception, File inputFile) {
        LOGGER.debug("Failed to ingest file [{}].", inputFile.getAbsolutePath(), exception);
        INGEST_LOGGER.warn("Failed to ingest file [{}]:  \n{}", inputFile.getAbsolutePath(),
                Exceptions.getFullMessage(exception));
    }

    private CreateResponse createMetacards(CatalogFacade catalog, List<Metacard> listOfMetaCards)
            throws IngestException, SourceUnavailableException {
        CreateRequest createRequest = new CreateRequestImpl(listOfMetaCards);
        return catalog.create(createRequest);
    }

    private Metacard readMetacard(File file) throws IngestException {
        Metacard result = null;

        FileInputStream fis = null;
        ObjectInputStream ois = null;

        try {
            if (DEFAULT_TRANSFORMER_ID.matches(transformerId)) {
                ois = new ObjectInputStream(new FileInputStream(file));
                result = (Metacard) ois.readObject();
                ois.close();
            } else {
                fis = new FileInputStream(file);
                result = generateMetacard(fis);
                if (StringUtils.isBlank(result.getTitle())) {
                    LOGGER.debug("Metacard title was blank. Setting title to filename.");
                    result.setAttribute(new AttributeImpl(Metacard.TITLE, file.getName()));
                }
                fis.close();
            }
        } catch (IOException | IllegalArgumentException | ClassNotFoundException e) {
            throw new IngestException(e);
        } finally {
            if (fis != null) {
                try {
                    fis.close();
                } catch (IOException e1) {
                    console.println(e1);
                }
            }

            if (ois != null) {
                try {
                    ois.close();
                } catch (IOException e2) {
                    console.println(e2);
                }
            }
        }

        return result;
    }

    private Metacard generateMetacard(InputStream message) throws IOException {
        try {
            if (message != null) {
                return transformer.transform(message);
            } else {
                throw new IllegalArgumentException("data file is null.");
            }

        } catch (CatalogTransformerException e) {
            throw new IllegalArgumentException("Transformation Failed for transformer: " + transformerId, e);
        }
    }

    private boolean verifyFailedIngestDirectory() {
        if (!failedIngestDirectory.exists()) {
            makeFailedIngestDirectory();
        }

        if (!failedIngestDirectory.canWrite()) {
            printErrorMessage("Directory [" + failedIngestDirectory.getAbsolutePath() + "] is not writable.");
            return false;
        } else {
            return true;
        }
    }

    private void makeFailedIngestDirectory() {
        if (!failedIngestDirectory.mkdirs()) {
            printErrorMessage("Unable to create directory [" + failedIngestDirectory.getAbsolutePath() + "].");
        }
    }

    private boolean processBatch(CatalogFacade catalog, ArrayList<Metacard> metacards)
            throws SourceUnavailableException {
        CreateResponse createResponse = null;

        try {
            createResponse = createMetacards(catalog, metacards);
        } catch (IngestException e) {
            printErrorMessage("Error executing command: " + e.getMessage());
            if (INGEST_LOGGER.isWarnEnabled()) {
                INGEST_LOGGER.warn("Error ingesting metacard batch {}", buildIngestLog(metacards), e);
            }
        } catch (SourceUnavailableException e) {
            if (INGEST_LOGGER.isWarnEnabled()) {
                INGEST_LOGGER.warn(
                        "Error on process batch, local provider not available. {}"
                                + " metacards failed to ingest. {}",
                        metacards.size(), buildIngestLog(metacards), e);
            }
        } finally {
            processingThreads.decrementAndGet();
        }

        if (createResponse != null) {
            ingestCount.getAndAdd(createResponse.getCreatedMetacards().size());
        }
        return createResponse != null;
    }

    private void moveToFailedIngestDirectory(File source) {
        File destination = new File(failedIngestDirectory.getAbsolutePath() + File.separator + source.getName());

        if (!source.renameTo(destination)) {
            printErrorMessage("Unable to move source file [" + source.getAbsolutePath() + "] to ["
                    + failedIngestDirectory + "].");
        }
    }

    private void buildQueue(Stream<Path> ingestStream, ArrayBlockingQueue<Metacard> metacardQueue, long start) {
        ingestStream.filter(a -> !a.toFile().isDirectory()).forEach(a -> {
            File file = a.toFile();

            if (file.isHidden()) {
                ignoreCount.incrementAndGet();
            } else {
                String extension = file.getName();

                if (extension.contains(".")) {
                    int x = extension.indexOf('.');
                    extension = extension.substring(x);
                }

                if (ignoreList != null && (ignoreList.contains(extension) || ignoreList.contains(file.getName()))) {
                    ignoreCount.incrementAndGet();
                    printProgressAndFlush(start, fileCount.get(), ingestCount.get() + ignoreCount.get());
                } else {
                    Metacard result;
                    try {
                        result = readMetacard(file);
                    } catch (IngestException e) {
                        result = null;
                        logIngestException(e, file);
                        if (failedIngestDirectory != null) {
                            moveToFailedIngestDirectory(file);
                        }
                        printErrorMessage("Failed to ingest file [" + file.getAbsolutePath() + "].");
                        if (INGEST_LOGGER.isWarnEnabled()) {
                            INGEST_LOGGER.warn("Failed to ingest file [{}].", file.getAbsolutePath());
                        }
                    }

                    if (result != null) {
                        try {
                            metacardQueue.put(result);
                        } catch (InterruptedException e) {
                            INGEST_LOGGER.error("Thread interrupted while waiting to 'put' metacard: {}", e);
                        }
                    }
                }
            }
        });
        doneBuildingQueue.set(true);
    }

    private void submitToCatalog(ScheduledExecutorService batchScheduler, ExecutorService executorService,
            ArrayBlockingQueue<Metacard> metacardQueue, CatalogFacade catalog, long start) {

        batchScheduler.scheduleWithFixedDelay(() -> {
            int queueSize = metacardQueue.size();
            if (queueSize > 0) {

                ArrayList<Metacard> metacardBatch = new ArrayList<>(batchSize);

                if (queueSize > batchSize || doneBuildingQueue.get()) {
                    metacardQueue.drainTo(metacardBatch, batchSize);
                    processingThreads.incrementAndGet();
                }

                if (metacardBatch.size() > 0) {
                    executorService.submit(() -> {
                        try {
                            processBatch(catalog, metacardBatch);
                        } catch (SourceUnavailableException e) {
                            if (INGEST_LOGGER.isWarnEnabled()) {
                                INGEST_LOGGER.warn("Error on process batch: {}", e);
                            }
                        }
                    });
                    printProgressAndFlush(start, fileCount.get(), ingestCount.get() + ignoreCount.get());
                }
            }
        }, 100, 100, TimeUnit.MILLISECONDS);
    }
}