org.apache.commons.compress.archivers.zip.ParallelScatterZipCreator.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.commons.compress.archivers.zip.ParallelScatterZipCreator.java

Source

/*
 *  Licensed to the Apache Software Foundation (ASF) under one or more
 *  contributor license agreements.  See the NOTICE file distributed with
 *  this work for additional information regarding copyright ownership.
 *  The ASF licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */
package org.apache.commons.compress.archivers.zip;

import org.apache.commons.compress.parallel.FileBasedScatterGatherBackingStore;
import org.apache.commons.compress.parallel.InputStreamSupplier;
import org.apache.commons.compress.parallel.ScatterGatherBackingStore;
import org.apache.commons.compress.parallel.ScatterGatherBackingStoreSupplier;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.zip.Deflater;

import static java.util.Collections.synchronizedList;
import static org.apache.commons.compress.archivers.zip.ZipArchiveEntryRequest.createZipArchiveEntryRequest;

/**
 * Creates a zip in parallel by using multiple threadlocal {@link ScatterZipOutputStream} instances.
 * <p>
 * Note that this class generally makes no guarantees about the order of things written to
 * the output file. Things that need to come in a specific order (manifests, directories)
 * must be handled by the client of this class, usually by writing these things to the
 * {@link ZipArchiveOutputStream} <em>before</em> calling {@link #writeTo writeTo} on this class.</p>
 * <p>
 * The client can supply an {@link java.util.concurrent.ExecutorService}, but for reasons of
 * memory model consistency, this will be shut down by this class prior to completion.
 * </p>
 * @since 1.10
 */
public class ParallelScatterZipCreator {
    private final List<ScatterZipOutputStream> streams = synchronizedList(new ArrayList<ScatterZipOutputStream>());
    private final ExecutorService es;
    private final ScatterGatherBackingStoreSupplier backingStoreSupplier;
    private final List<Future<Object>> futures = new ArrayList<>();

    private final long startedAt = System.currentTimeMillis();
    private long compressionDoneAt = 0;
    private long scatterDoneAt;

    private static class DefaultBackingStoreSupplier implements ScatterGatherBackingStoreSupplier {
        final AtomicInteger storeNum = new AtomicInteger(0);

        @Override
        public ScatterGatherBackingStore get() throws IOException {
            final File tempFile = File.createTempFile("parallelscatter", "n" + storeNum.incrementAndGet());
            return new FileBasedScatterGatherBackingStore(tempFile);
        }
    }

    private ScatterZipOutputStream createDeferred(
            final ScatterGatherBackingStoreSupplier scatterGatherBackingStoreSupplier) throws IOException {
        final ScatterGatherBackingStore bs = scatterGatherBackingStoreSupplier.get();
        // lifecycle is bound to the ScatterZipOutputStream returned
        final StreamCompressor sc = StreamCompressor.create(Deflater.DEFAULT_COMPRESSION, bs); //NOSONAR
        return new ScatterZipOutputStream(bs, sc);
    }

    private final ThreadLocal<ScatterZipOutputStream> tlScatterStreams = new ThreadLocal<ScatterZipOutputStream>() {
        @Override
        protected ScatterZipOutputStream initialValue() {
            try {
                final ScatterZipOutputStream scatterStream = createDeferred(backingStoreSupplier);
                streams.add(scatterStream);
                return scatterStream;
            } catch (final IOException e) {
                throw new RuntimeException(e); //NOSONAR
            }
        }
    };

    /**
     * Create a ParallelScatterZipCreator with default threads, which is set to the number of available
     * processors, as defined by {@link java.lang.Runtime#availableProcessors}
     */
    public ParallelScatterZipCreator() {
        this(Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()));
    }

    /**
     * Create a ParallelScatterZipCreator
     *
     * @param executorService The executorService to use for parallel scheduling. For technical reasons,
     *                        this will be shut down by this class.
     */
    public ParallelScatterZipCreator(final ExecutorService executorService) {
        this(executorService, new DefaultBackingStoreSupplier());
    }

    /**
     * Create a ParallelScatterZipCreator
     *
     * @param executorService The executorService to use. For technical reasons, this will be shut down
     *                        by this class.
     * @param backingStoreSupplier The supplier of backing store which shall be used
     */
    public ParallelScatterZipCreator(final ExecutorService executorService,
            final ScatterGatherBackingStoreSupplier backingStoreSupplier) {
        this.backingStoreSupplier = backingStoreSupplier;
        es = executorService;
    }

    /**
     * Adds an archive entry to this archive.
     * <p>
     * This method is expected to be called from a single client thread
     * </p>
     *
     * @param zipArchiveEntry The entry to add.
     * @param source          The source input stream supplier
     */

    public void addArchiveEntry(final ZipArchiveEntry zipArchiveEntry, final InputStreamSupplier source) {
        submit(createCallable(zipArchiveEntry, source));
    }

    /**
     * Adds an archive entry to this archive.
     * <p>
     * This method is expected to be called from a single client thread
     * </p>
     *
     * @param zipArchiveEntryRequestSupplier Should supply the entry to be added.
     * @since 1.13
     */
    public void addArchiveEntry(final ZipArchiveEntryRequestSupplier zipArchiveEntryRequestSupplier) {
        submit(createCallable(zipArchiveEntryRequestSupplier));
    }

    /**
     * Submit a callable for compression.
     *
     * @see ParallelScatterZipCreator#createCallable for details of if/when to use this.
     *
     * @param callable The callable to run, created by {@link #createCallable createCallable}, possibly wrapped by caller.
     */
    public final void submit(final Callable<Object> callable) {
        futures.add(es.submit(callable));
    }

    /**
     * Create a callable that will compress the given archive entry.
     *
     * <p>This method is expected to be called from a single client thread.</p>
     *
     * Consider using {@link #addArchiveEntry addArchiveEntry}, which wraps this method and {@link #submit submit}.
     * The most common use case for using {@link #createCallable createCallable} and {@link #submit submit} from a
     * client is if you want to wrap the callable in something that can be prioritized by the supplied
     * {@link ExecutorService}, for instance to process large or slow files first.
     * Since the creation of the {@link ExecutorService} is handled by the client, all of this is up to the client.
     *
     * @param zipArchiveEntry The entry to add.
     * @param source          The source input stream supplier
     * @return A callable that should subsequently passed to #submit, possibly in a wrapped/adapted from. The
     * value of this callable is not used, but any exceptions happening inside the compression
     * will be propagated through the callable.
     */

    public final Callable<Object> createCallable(final ZipArchiveEntry zipArchiveEntry,
            final InputStreamSupplier source) {
        final int method = zipArchiveEntry.getMethod();
        if (method == ZipMethod.UNKNOWN_CODE) {
            throw new IllegalArgumentException("Method must be set on zipArchiveEntry: " + zipArchiveEntry);
        }
        final ZipArchiveEntryRequest zipArchiveEntryRequest = createZipArchiveEntryRequest(zipArchiveEntry, source);
        return new Callable<Object>() {
            @Override
            public Object call() throws Exception {
                tlScatterStreams.get().addArchiveEntry(zipArchiveEntryRequest);
                return null;
            }
        };
    }

    /**
     * Create a callable that will compress archive entry supplied by {@link ZipArchiveEntryRequestSupplier}.
     *
     * <p>This method is expected to be called from a single client thread.</p>
     *
     * The same as {@link #createCallable(ZipArchiveEntry, InputStreamSupplier)}, but the archive entry
     * to be added is supplied by a {@link ZipArchiveEntryRequestSupplier}.
     *
     * @see #createCallable(ZipArchiveEntry, InputStreamSupplier)
     *
     * @param zipArchiveEntryRequestSupplier Should supply the entry to be added.
     * @return A callable that should subsequently passed to #submit, possibly in a wrapped/adapted from. The
     * value of this callable is not used, but any exceptions happening inside the compression
     * will be propagated through the callable.
     * @since 1.13
     */
    public final Callable<Object> createCallable(
            final ZipArchiveEntryRequestSupplier zipArchiveEntryRequestSupplier) {
        return new Callable<Object>() {
            @Override
            public Object call() throws Exception {
                tlScatterStreams.get().addArchiveEntry(zipArchiveEntryRequestSupplier.get());
                return null;
            }
        };
    }

    /**
     * Write the contents this to the target {@link ZipArchiveOutputStream}.
     * <p>
     * It may be beneficial to write things like directories and manifest files to the targetStream
     * before calling this method.
     * </p>
     *
     * @param targetStream The {@link ZipArchiveOutputStream} to receive the contents of the scatter streams
     * @throws IOException          If writing fails
     * @throws InterruptedException If we get interrupted
     * @throws ExecutionException   If something happens in the parallel execution
     */
    public void writeTo(final ZipArchiveOutputStream targetStream)
            throws IOException, InterruptedException, ExecutionException {

        // Make sure we catch any exceptions from parallel phase
        for (final Future<?> future : futures) {
            future.get();
        }

        es.shutdown();
        es.awaitTermination(1000 * 60l, TimeUnit.SECONDS); // == Infinity. We really *must* wait for this to complete

        // It is important that all threads terminate before we go on, ensure happens-before relationship
        compressionDoneAt = System.currentTimeMillis();

        for (final ScatterZipOutputStream scatterStream : streams) {
            scatterStream.writeTo(targetStream);
            scatterStream.close();
        }

        scatterDoneAt = System.currentTimeMillis();
    }

    /**
     * Returns a message describing the overall statistics of the compression run
     *
     * @return A string
     */
    public ScatterStatistics getStatisticsMessage() {
        return new ScatterStatistics(compressionDoneAt - startedAt, scatterDoneAt - compressionDoneAt);
    }
}