com.google.devtools.build.android.ziputils.SplitZip.java Source code

Introduction

Here is the source code for com.google.devtools.build.android.ziputils.SplitZip.java
Source

// Copyright 2015 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.android.ziputils;

import static com.google.devtools.build.android.ziputils.DataDescriptor.EXTCRC;
import static com.google.devtools.build.android.ziputils.DataDescriptor.EXTLEN;
import static com.google.devtools.build.android.ziputils.DataDescriptor.EXTSIZ;
import static com.google.devtools.build.android.ziputils.DirectoryEntry.CENCRC;
import static com.google.devtools.build.android.ziputils.DirectoryEntry.CENLEN;
import static com.google.devtools.build.android.ziputils.DirectoryEntry.CENSIZ;
import static com.google.devtools.build.android.ziputils.DirectoryEntry.CENTIM;
import static com.google.devtools.build.android.ziputils.LocalFileHeader.LOCFLG;
import static com.google.devtools.build.android.ziputils.LocalFileHeader.LOCTIM;
import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.base.Predicates;
import com.google.common.collect.Sets;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

/**
 * Extracts entries from a set of input archives, and copies them to N output archive of
 * approximately equal size, while attempting to split archives on package (directory) boundaries.
 * Optionally, accept a list of entries to be added to the first output archive, splitting
 * remaining entries by package boundaries.
 */
public class SplitZip implements EntryHandler {
    private boolean verbose = false;
    private boolean splitDexFiles = false;
    private final List<ZipIn> inputs;
    private final List<ZipOut> outputs;
    private String filterFile;
    private InputStream filterInputStream;
    private String resourceFile;
    private Date date;
    private DosTime dosTime;
    // Internal state variables:
    private boolean finished = false;
    private Set<String> filter;
    private ZipOut[] zipOuts;
    private ZipOut resourceOut;
    private final Map<String, ZipOut> assignments = new HashMap<>();
    private final Map<String, CentralDirectory> centralDirectories;
    private final Set<String> classes = new TreeSet<>();
    private Predicate<String> inputFilter = Predicates.alwaysTrue();

    /**
     * Creates an un-configured {@code SplitZip} instance.
     */
    public SplitZip() {
        inputs = new ArrayList<>();
        outputs = new ArrayList<>();
        centralDirectories = new HashMap<>();
    }

    /**
     * Configures a resource file. By default, resources are output in the initial shard.
     * If a resource file is specified, resources are written to this instead.
     * @param resourceFile in not {@code null}, the name of a file in which to output resources.
     * @return this object.
     */
    public SplitZip setResourceFile(String resourceFile) {
        this.resourceFile = resourceFile;
        return this;
    }

    // Package private for testing with mock file
    SplitZip setResourceFile(ZipOut resOut) {
        resourceOut = resOut;
        return this;
    }

    /**
     * Gets the name of the resource output file. If no resource output file is configured, resources
     * are output in the initial shard.
     * @return the name of the resource output file, or {@code null} if no file has been configured.
     */
    public String getResourceFile() {
        return resourceFile;
    }

    /**
     * Configures a file containing a list of files to be included in the first output archive.
     *
     * @param clFile path of class file list.
     * @return this object
     */
    public SplitZip setMainClassListFile(String clFile) {
        filterFile = clFile;
        return this;
    }

    // Package private for testing with mock file
    SplitZip setMainClassListFile(InputStream clInputStream) {
        filterInputStream = clInputStream;
        return this;
    }

    /**
     * Gets the path of the file listing the content of the initial shard.
     * @return return path of file list file, or {@code null} if not set.
     */
    public String getMainClassListFile() {
        return filterFile;
    }

    /**
     * Configures verbose mode.
     *
     * @param flag set to {@code true} to turn on verbose mode.
     * @return this object
     */
    public SplitZip setVerbose(boolean flag) {
        verbose = flag;
        return this;
    }

    /**
     * Gets the verbosity mode.
     * @return {@code true} iff verbose mode is enabled
     */
    public boolean isVerbose() {
        return verbose;
    }

    /**
     * Configures whether to split .dex files along with .class files.
     *
     * @param flag {@code true} will split .dex files; {@code false} treats them as resources
     */
    public SplitZip setSplitDexedClasses(boolean flag) {
        splitDexFiles = flag;
        return this;
    }

    /**
     * Sets date to overwrite timestamp of copied entries. Setting the date to {@code null} means
     * using the date and time information in the input file. Set an explicit date to override.
     *
     * @param date modified date and time to set for entries in output.
     * @return this object.
     */
    public SplitZip setEntryDate(Date date) {
        this.date = date;
        this.dosTime = date == null ? null : new DosTime(date);
        return this;
    }

    /**
     * Sets date to {@link DosTime#DOS_EPOCH}.
     * @return this object.
     */
    public SplitZip useDefaultEntryDate() {
        this.date = DosTime.DOS_EPOCH;
        this.dosTime = DosTime.EPOCH;
        return this;
    }

    /**
     * Gets the entry modified date.
     */
    public Date getEntryDate() {
        return date;
    }

    /**
     * Configures multiple input file locations.
     *
     * @param inputs list of input locations.
     * @return this object
     * @throws java.io.IOException
     */
    public SplitZip addInputs(Iterable<String> inputs) throws IOException {
        for (String i : inputs) {
            addInput(i);
        }
        return this;
    }

    /**
     * Configures an input location. An input file must be a zip archive.
     *
     * @param filename path for an input location.
     * @return this object
     * @throws java.io.IOException
     */
    public SplitZip addInput(String filename) throws IOException {
        if (filename != null) {
            inputs.add(new ZipIn(new FileInputStream(filename).getChannel(), filename));
        }
        return this;
    }

    // Package private, for testing using mock file system.
    SplitZip addInput(ZipIn in) throws IOException {
        Preconditions.checkNotNull(in);
        inputs.add(in);
        return this;
    }

    /**
     * Configures multiple output file locations.
     *
     * @param outputs list of output files.
     * @return this object
     * @throws java.io.IOException
     */
    public SplitZip addOutputs(Iterable<String> outputs) throws IOException {
        for (String o : outputs) {
            addOutput(o);
        }
        return this;
    }

    /**
     * Configures an output location.
     *
     * @param output path for an output location.
     * @return this object
     * @throws java.io.IOException
     */
    public SplitZip addOutput(String output) throws IOException {
        Preconditions.checkNotNull(output);
        outputs.add(new ZipOut(new FileOutputStream(output, false).getChannel(), output));
        return this;
    }

    // Package private for testing with mock file
    SplitZip addOutput(ZipOut output) throws IOException {
        Preconditions.checkNotNull(output);
        outputs.add(output);
        return this;
    }

    /**
     * Set a predicate to only include files with matching filenames in any of the outputs.  <b>Other
     * zip entries are dropped</b>, regardless of whether they're classes or resources and regardless
     * of whether they're listed in {@link #setMainClassListFile}.
     */
    public SplitZip setInputFilter(Predicate<String> inputFilter) {
        this.inputFilter = Preconditions.checkNotNull(inputFilter);
        return this;
    }

    /**
     * Executes this {@code SplitZip}, reading content from the configured input locations, creating
     * the specified number of archives, in the configured output directory.
     *
     * @return this object
     * @throws java.io.IOException
     */
    public SplitZip run() throws IOException {
        verbose("SplitZip: Splitting in: " + outputs.size());
        verbose("SplitZip: with filter: " + filterFile);
        checkConfig();
        // Prepare output files
        zipOuts = outputs.toArray(new ZipOut[outputs.size()]);
        if (resourceFile != null) {
            resourceOut = new ZipOut(new FileOutputStream(resourceFile, false).getChannel(), resourceFile);
        } else if (resourceOut == null) { // may have been set for testing
            resourceOut = zipOuts[0];
        }

        // Read directories of input files
        for (ZipIn zip : inputs) {
            zip.endOfCentralDirectory();
            centralDirectories.put(zip.getFilename(), zip.centralDirectory());
            zip.centralDirectory();
        }
        // Assign input entries to output files
        split();
        // Copy entries to the assigned output files
        for (ZipIn zip : inputs) {
            zip.scanEntries(this);
        }
        return this;
    }

    /**
     * Copies an entry to the assigned output files. Called for each entry in the input files.
     * @param in
     * @param header
     * @param dirEntry
     * @param data
     * @throws IOException
     */
    @Override
    public void handle(ZipIn in, LocalFileHeader header, DirectoryEntry dirEntry, ByteBuffer data)
            throws IOException {
        ZipOut out = assignments.remove(normalizedFilename(header.getFilename()));
        if (out == null) {
            // Skip unassigned file; includes a file with the same name as a previously processed one.
            // This in particular picks the first .class or .dex file encountered for a given class name
            // and drops any file not matched by inputFilter.
            return;
        }
        if (dirEntry == null) {
            // Shouldn't get here, as there should be no assignment.
            System.out.println("Warning: no directory entry");
            return;
        }
        // Clone directory entry
        DirectoryEntry entryOut = out.nextEntry(dirEntry);
        if (dosTime != null) {
            // Overwrite time stamp
            header.set(LOCTIM, dosTime.time);
            entryOut.set(CENTIM, dosTime.time);
        }
        out.write(header);
        out.write(data);
        if ((header.get(LOCFLG) & LocalFileHeader.SIZE_MASKED_FLAG) != 0) {
            // Instead of this, we could fix the header with the size information
            // from the directory entry. For now, keep the entry encoded as-is.
            DataDescriptor desc = DataDescriptor.allocate().set(EXTCRC, dirEntry.get(CENCRC))
                    .set(EXTSIZ, dirEntry.get(CENSIZ)).set(EXTLEN, dirEntry.get(CENLEN));
            out.write(desc);
        }
    }

    /**
     * Writes any remaining output data to the output stream.
     *
     * @throws IOException if the output stream or the filter throws an IOException
     * @throws IllegalStateException if this method was already called earlier
     */
    public void finish() throws IOException {
        checkNotFinished();
        finished = true;
        if (resourceOut != null) {
            resourceOut.finish();
        }
        for (ZipOut zo : zipOuts) {
            zo.finish();
        }
    }

    /**
     * Writes any remaining output data to the output stream and closes it.
     *
     * @throws IOException if the output stream or the filter throws an IOException
     */
    public void close() throws IOException {
        if (!finished) {
            finish();
        }
        if (resourceOut != null) {
            resourceOut.close();
        }
        for (ZipOut zo : zipOuts) {
            zo.close();
        }
    }

    private void checkNotFinished() {
        if (finished) {
            throw new IllegalStateException();
        }
    }

    /**
     * Validates configuration before execution.
     */
    private void checkConfig() throws IOException {
        if (outputs.size() < 1) {
            throw new IllegalStateException("Require at least one output file");
        }
        filter = filterFile == null && filterInputStream == null ? null : readPaths(filterFile);
    }

    /**
     * Parses the entries and assign each entry to an output file.
     */
    private void split() {
        for (ZipIn in : inputs) {
            CentralDirectory cdir = centralDirectories.get(in.getFilename());
            for (DirectoryEntry entry : cdir.list()) {
                String filename = normalizedFilename(entry.getFilename());
                if (!inputFilter.apply(filename)) {
                    continue;
                }
                if (filename.endsWith(".class")) {
                    // Only pass classes to the splitter, so that it can do the best job
                    // possible distributing them across output files.
                    classes.add(filename);
                } else if (!filename.endsWith("/")) {
                    // Non class files (resources) are either assigned to the first
                    // output file, or to a specified resource output file.
                    assignments.put(filename, resourceOut);
                }
            }
        }
        Splitter splitter = new Splitter(outputs.size(), classes.size());
        if (filter != null) {
            // Assign files in the filter to the first output file.
            splitter.assign(Sets.filter(filter, inputFilter));
            splitter.nextShard(); // minimal initial shard
        }
        for (String path : classes) {
            // Use normalized filename so the filter file doesn't have to change
            int assignment = splitter.assign(path);
            Preconditions.checkState(assignment >= 0 && assignment < zipOuts.length);
            assignments.put(path, zipOuts[assignment]);
        }
    }

    private String normalizedFilename(String filename) {
        if (splitDexFiles && filename.endsWith(".class.dex")) { // suffix generated by DexBuilder
            return filename.substring(0, filename.length() - ".dex".length());
        }
        return filename;
    }

    /**
     * Reads paths of classes required in first shard. For testing purposes, this relies
     * on the file system configured for the {@code Zip} library class.
     */
    private Set<String> readPaths(String fileName) throws IOException {
        Set<String> paths = new HashSet<>();
        BufferedReader reader = null;
        try {
            if (filterInputStream == null) {
                filterInputStream = new FileInputStream(fileName);
            }
            reader = new BufferedReader(new InputStreamReader(filterInputStream, UTF_8));
            String line;
            while (null != (line = reader.readLine())) {
                paths.add(fixPath(line));
            }
            return paths;
        } finally {
            if (reader != null) {
                reader.close();
            }
        }
    }

    // TODO(bazel-team): Got this from 'dx'. I'm not sure we need this part. Keep it for now,
    // to make sure we read the main dex list the exact same way that dx would.
    private String fixPath(String path) {
        if (File.separatorChar == '\\') {
            path = path.replace('\\', '/');
        }
        int index = path.lastIndexOf("/./");
        if (index != -1) {
            return path.substring(index + 3);
        }
        if (path.startsWith("./")) {
            return path.substring(2);
        }
        return path;
    }

    private void verbose(String msg) {
        if (verbose) {
            System.out.println(msg);
        }
    }
}