org.eclipse.jgit.lib.ObjectChecker.java Source code

Java tutorial

Introduction

Here is the source code for org.eclipse.jgit.lib.ObjectChecker.java

Source

/*
 * Copyright (C) 2008-2010, Google Inc.
 * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
 * and other copyright owners as documented in the project's IP log.
 *
 * This program and the accompanying materials are made available
 * under the terms of the Eclipse Distribution License v1.0 which
 * accompanies this distribution, is reproduced below, and is
 * available at http://www.eclipse.org/org/documents/edl-v10.php
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or
 * without modification, are permitted provided that the following
 * conditions are met:
 *
 * - Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 *
 * - Redistributions in binary form must reproduce the above
 *   copyright notice, this list of conditions and the following
 *   disclaimer in the documentation and/or other materials provided
 *   with the distribution.
 *
 * - Neither the name of the Eclipse Foundation, Inc. nor the
 *   names of its contributors may be used to endorse or promote
 *   products derived from this software without specific prior
 *   written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package org.eclipse.jgit.lib;

import static org.eclipse.jgit.lib.Constants.DOT_GIT_MODULES;
import static org.eclipse.jgit.lib.Constants.OBJECT_ID_LENGTH;
import static org.eclipse.jgit.lib.Constants.OBJECT_ID_STRING_LENGTH;
import static org.eclipse.jgit.lib.Constants.OBJ_BAD;
import static org.eclipse.jgit.lib.Constants.OBJ_BLOB;
import static org.eclipse.jgit.lib.Constants.OBJ_COMMIT;
import static org.eclipse.jgit.lib.Constants.OBJ_TAG;
import static org.eclipse.jgit.lib.Constants.OBJ_TREE;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_DATE;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_EMAIL;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_OBJECT_SHA1;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_PARENT_SHA1;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_TIMEZONE;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_TREE_SHA1;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.BAD_UTF8;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.DUPLICATE_ENTRIES;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.EMPTY_NAME;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.FULL_PATHNAME;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.HAS_DOT;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.HAS_DOTDOT;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.HAS_DOTGIT;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_AUTHOR;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_COMMITTER;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_EMAIL;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_OBJECT;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_SPACE_BEFORE_DATE;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_TAG_ENTRY;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_TREE;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.MISSING_TYPE_ENTRY;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.NULL_SHA1;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.TREE_NOT_SORTED;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.UNKNOWN_TYPE;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.WIN32_BAD_NAME;
import static org.eclipse.jgit.lib.ObjectChecker.ErrorType.ZERO_PADDED_FILEMODE;
import static org.eclipse.jgit.util.Paths.compare;
import static org.eclipse.jgit.util.Paths.compareSameName;
import static org.eclipse.jgit.util.RawParseUtils.nextLF;
import static org.eclipse.jgit.util.RawParseUtils.parseBase10;

import java.text.MessageFormat;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;

import org.eclipse.jgit.annotations.NonNull;
import org.eclipse.jgit.annotations.Nullable;
import org.eclipse.jgit.errors.CorruptObjectException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.util.MutableInteger;
import org.eclipse.jgit.util.RawParseUtils;
import org.eclipse.jgit.util.StringUtils;

/**
 * Verifies that an object is formatted correctly.
 * <p>
 * Verifications made by this class only check that the fields of an object are
 * formatted correctly. The ObjectId checksum of the object is not verified, and
 * connectivity links between objects are also not verified. Its assumed that
 * the caller can provide both of these validations on its own.
 * <p>
 * Instances of this class are not thread safe, but they may be reused to
 * perform multiple object validations, calling {@link #reset()} between them to
 * clear the internal state (e.g. {@link #getGitsubmodules()})
 */
public class ObjectChecker {
    /** Header "tree " */
    public static final byte[] tree = Constants.encodeASCII("tree "); //$NON-NLS-1$

    /** Header "parent " */
    public static final byte[] parent = Constants.encodeASCII("parent "); //$NON-NLS-1$

    /** Header "author " */
    public static final byte[] author = Constants.encodeASCII("author "); //$NON-NLS-1$

    /** Header "committer " */
    public static final byte[] committer = Constants.encodeASCII("committer "); //$NON-NLS-1$

    /** Header "encoding " */
    public static final byte[] encoding = Constants.encodeASCII("encoding "); //$NON-NLS-1$

    /** Header "object " */
    public static final byte[] object = Constants.encodeASCII("object "); //$NON-NLS-1$

    /** Header "type " */
    public static final byte[] type = Constants.encodeASCII("type "); //$NON-NLS-1$

    /** Header "tag " */
    public static final byte[] tag = Constants.encodeASCII("tag "); //$NON-NLS-1$

    /** Header "tagger " */
    public static final byte[] tagger = Constants.encodeASCII("tagger "); //$NON-NLS-1$

    /** Path ".gitmodules" */
    private static final byte[] dotGitmodules = Constants.encodeASCII(DOT_GIT_MODULES);

    /**
     * Potential issues identified by the checker.
     *
     * @since 4.2
     */
    public enum ErrorType {
        // @formatter:off
        // These names match git-core so that fsck section keys also match.
        /***/
        NULL_SHA1,
        /***/
        DUPLICATE_ENTRIES,
        /***/
        TREE_NOT_SORTED,
        /***/
        ZERO_PADDED_FILEMODE,
        /***/
        EMPTY_NAME,
        /***/
        FULL_PATHNAME,
        /***/
        HAS_DOT,
        /***/
        HAS_DOTDOT,
        /***/
        HAS_DOTGIT,
        /***/
        BAD_OBJECT_SHA1,
        /***/
        BAD_PARENT_SHA1,
        /***/
        BAD_TREE_SHA1,
        /***/
        MISSING_AUTHOR,
        /***/
        MISSING_COMMITTER,
        /***/
        MISSING_OBJECT,
        /***/
        MISSING_TREE,
        /***/
        MISSING_TYPE_ENTRY,
        /***/
        MISSING_TAG_ENTRY,
        /***/
        BAD_DATE,
        /***/
        BAD_EMAIL,
        /***/
        BAD_TIMEZONE,
        /***/
        MISSING_EMAIL,
        /***/
        MISSING_SPACE_BEFORE_DATE,
        /** @since 5.2 */
        GITMODULES_BLOB,
        /** @since 5.2 */
        GITMODULES_LARGE,
        /** @since 5.2 */
        GITMODULES_NAME,
        /** @since 5.2 */
        GITMODULES_PARSE,
        /** @since 5.2 */
        GITMODULES_PATH,
        /** @since 5.2 */
        GITMODULES_SYMLINK,
        /** @since 5.2 */
        GITMODULES_URL,
        /***/
        UNKNOWN_TYPE,

        // These are unique to JGit.
        /***/
        WIN32_BAD_NAME,
        /***/
        BAD_UTF8;
        // @formatter:on

        /** @return camelCaseVersion of the name. */
        public String getMessageId() {
            String n = name();
            StringBuilder r = new StringBuilder(n.length());
            for (int i = 0; i < n.length(); i++) {
                char c = n.charAt(i);
                if (c != '_') {
                    r.append(StringUtils.toLowerCase(c));
                } else {
                    r.append(n.charAt(++i));
                }
            }
            return r.toString();
        }
    }

    private final MutableObjectId tempId = new MutableObjectId();
    private final MutableInteger bufPtr = new MutableInteger();

    private EnumSet<ErrorType> errors = EnumSet.allOf(ErrorType.class);
    private ObjectIdSet skipList;
    private boolean allowInvalidPersonIdent;
    private boolean windows;
    private boolean macosx;

    private final List<GitmoduleEntry> gitsubmodules = new ArrayList<>();

    /**
     * Enable accepting specific malformed (but not horribly broken) objects.
     *
     * @param objects
     *            collection of object names known to be broken in a non-fatal
     *            way that should be ignored by the checker.
     * @return {@code this}
     * @since 4.2
     */
    public ObjectChecker setSkipList(@Nullable ObjectIdSet objects) {
        skipList = objects;
        return this;
    }

    /**
     * Configure error types to be ignored across all objects.
     *
     * @param ids
     *            error types to ignore. The caller's set is copied.
     * @return {@code this}
     * @since 4.2
     */
    public ObjectChecker setIgnore(@Nullable Set<ErrorType> ids) {
        errors = EnumSet.allOf(ErrorType.class);
        if (ids != null) {
            errors.removeAll(ids);
        }
        return this;
    }

    /**
     * Add message type to be ignored across all objects.
     *
     * @param id
     *            error type to ignore.
     * @param ignore
     *            true to ignore this error; false to treat the error as an
     *            error and throw.
     * @return {@code this}
     * @since 4.2
     */
    public ObjectChecker setIgnore(ErrorType id, boolean ignore) {
        if (ignore) {
            errors.remove(id);
        } else {
            errors.add(id);
        }
        return this;
    }

    /**
     * Enable accepting leading zero mode in tree entries.
     * <p>
     * Some broken Git libraries generated leading zeros in the mode part of
     * tree entries. This is technically incorrect but gracefully allowed by
     * git-core. JGit rejects such trees by default, but may need to accept
     * them on broken histories.
     * <p>
     * Same as {@code setIgnore(ZERO_PADDED_FILEMODE, allow)}.
     *
     * @param allow allow leading zero mode.
     * @return {@code this}.
     * @since 3.4
     */
    public ObjectChecker setAllowLeadingZeroFileMode(boolean allow) {
        return setIgnore(ZERO_PADDED_FILEMODE, allow);
    }

    /**
     * Enable accepting invalid author, committer and tagger identities.
     * <p>
     * Some broken Git versions/libraries allowed users to create commits and
     * tags with invalid formatting between the name, email and timestamp.
     *
     * @param allow
     *            if true accept invalid person identity strings.
     * @return {@code this}.
     * @since 4.0
     */
    public ObjectChecker setAllowInvalidPersonIdent(boolean allow) {
        allowInvalidPersonIdent = allow;
        return this;
    }

    /**
     * Restrict trees to only names legal on Windows platforms.
     * <p>
     * Also rejects any mixed case forms of reserved names ({@code .git}).
     *
     * @param win true if Windows name checking should be performed.
     * @return {@code this}.
     * @since 3.4
     */
    public ObjectChecker setSafeForWindows(boolean win) {
        windows = win;
        return this;
    }

    /**
     * Restrict trees to only names legal on Mac OS X platforms.
     * <p>
     * Rejects any mixed case forms of reserved names ({@code .git})
     * for users working on HFS+ in case-insensitive (default) mode.
     *
     * @param mac true if Mac OS X name checking should be performed.
     * @return {@code this}.
     * @since 3.4
     */
    public ObjectChecker setSafeForMacOS(boolean mac) {
        macosx = mac;
        return this;
    }

    /**
     * Check an object for parsing errors.
     *
     * @param objType
     *            type of the object. Must be a valid object type code in
     *            {@link org.eclipse.jgit.lib.Constants}.
     * @param raw
     *            the raw data which comprises the object. This should be in the
     *            canonical format (that is the format used to generate the
     *            ObjectId of the object). The array is never modified.
     * @throws org.eclipse.jgit.errors.CorruptObjectException
     *             if an error is identified.
     */
    public void check(int objType, byte[] raw) throws CorruptObjectException {
        check(idFor(objType, raw), objType, raw);
    }

    /**
     * Check an object for parsing errors.
     *
     * @param id
     *            identify of the object being checked.
     * @param objType
     *            type of the object. Must be a valid object type code in
     *            {@link org.eclipse.jgit.lib.Constants}.
     * @param raw
     *            the raw data which comprises the object. This should be in the
     *            canonical format (that is the format used to generate the
     *            ObjectId of the object). The array is never modified.
     * @throws org.eclipse.jgit.errors.CorruptObjectException
     *             if an error is identified.
     * @since 4.2
     */
    public void check(@Nullable AnyObjectId id, int objType, byte[] raw) throws CorruptObjectException {
        switch (objType) {
        case OBJ_COMMIT:
            checkCommit(id, raw);
            break;
        case OBJ_TAG:
            checkTag(id, raw);
            break;
        case OBJ_TREE:
            checkTree(id, raw);
            break;
        case OBJ_BLOB:
            BlobObjectChecker checker = newBlobObjectChecker();
            if (checker == null) {
                checkBlob(raw);
            } else {
                checker.update(raw, 0, raw.length);
                checker.endBlob(id);
            }
            break;
        default:
            report(UNKNOWN_TYPE, id,
                    MessageFormat.format(JGitText.get().corruptObjectInvalidType2, Integer.valueOf(objType)));
        }
    }

    private boolean checkId(byte[] raw) {
        int p = bufPtr.value;
        try {
            tempId.fromString(raw, p);
        } catch (IllegalArgumentException e) {
            bufPtr.value = nextLF(raw, p);
            return false;
        }

        p += OBJECT_ID_STRING_LENGTH;
        if (raw[p] == '\n') {
            bufPtr.value = p + 1;
            return true;
        }
        bufPtr.value = nextLF(raw, p);
        return false;
    }

    private void checkPersonIdent(byte[] raw, @Nullable AnyObjectId id) throws CorruptObjectException {
        if (allowInvalidPersonIdent) {
            bufPtr.value = nextLF(raw, bufPtr.value);
            return;
        }

        final int emailB = nextLF(raw, bufPtr.value, '<');
        if (emailB == bufPtr.value || raw[emailB - 1] != '<') {
            report(MISSING_EMAIL, id, JGitText.get().corruptObjectMissingEmail);
            bufPtr.value = nextLF(raw, bufPtr.value);
            return;
        }

        final int emailE = nextLF(raw, emailB, '>');
        if (emailE == emailB || raw[emailE - 1] != '>') {
            report(BAD_EMAIL, id, JGitText.get().corruptObjectBadEmail);
            bufPtr.value = nextLF(raw, bufPtr.value);
            return;
        }
        if (emailE == raw.length || raw[emailE] != ' ') {
            report(MISSING_SPACE_BEFORE_DATE, id, JGitText.get().corruptObjectBadDate);
            bufPtr.value = nextLF(raw, bufPtr.value);
            return;
        }

        parseBase10(raw, emailE + 1, bufPtr); // when
        if (emailE + 1 == bufPtr.value || bufPtr.value == raw.length || raw[bufPtr.value] != ' ') {
            report(BAD_DATE, id, JGitText.get().corruptObjectBadDate);
            bufPtr.value = nextLF(raw, bufPtr.value);
            return;
        }

        int p = bufPtr.value + 1;
        parseBase10(raw, p, bufPtr); // tz offset
        if (p == bufPtr.value) {
            report(BAD_TIMEZONE, id, JGitText.get().corruptObjectBadTimezone);
            bufPtr.value = nextLF(raw, bufPtr.value);
            return;
        }

        p = bufPtr.value;
        if (raw[p] == '\n') {
            bufPtr.value = p + 1;
        } else {
            report(BAD_TIMEZONE, id, JGitText.get().corruptObjectBadTimezone);
            bufPtr.value = nextLF(raw, p);
        }
    }

    /**
     * Check a commit for errors.
     *
     * @param raw
     *            the commit data. The array is never modified.
     * @throws org.eclipse.jgit.errors.CorruptObjectException
     *             if any error was detected.
     */
    public void checkCommit(byte[] raw) throws CorruptObjectException {
        checkCommit(idFor(OBJ_COMMIT, raw), raw);
    }

    /**
     * Check a commit for errors.
     *
     * @param id
     *            identity of the object being checked.
     * @param raw
     *            the commit data. The array is never modified.
     * @throws org.eclipse.jgit.errors.CorruptObjectException
     *             if any error was detected.
     * @since 4.2
     */
    public void checkCommit(@Nullable AnyObjectId id, byte[] raw) throws CorruptObjectException {
        bufPtr.value = 0;

        if (!match(raw, tree)) {
            report(MISSING_TREE, id, JGitText.get().corruptObjectNotreeHeader);
        } else if (!checkId(raw)) {
            report(BAD_TREE_SHA1, id, JGitText.get().corruptObjectInvalidTree);
        }

        while (match(raw, parent)) {
            if (!checkId(raw)) {
                report(BAD_PARENT_SHA1, id, JGitText.get().corruptObjectInvalidParent);
            }
        }

        if (match(raw, author)) {
            checkPersonIdent(raw, id);
        } else {
            report(MISSING_AUTHOR, id, JGitText.get().corruptObjectNoAuthor);
        }

        if (match(raw, committer)) {
            checkPersonIdent(raw, id);
        } else {
            report(MISSING_COMMITTER, id, JGitText.get().corruptObjectNoCommitter);
        }
    }

    /**
     * Check an annotated tag for errors.
     *
     * @param raw
     *            the tag data. The array is never modified.
     * @throws org.eclipse.jgit.errors.CorruptObjectException
     *             if any error was detected.
     */
    public void checkTag(byte[] raw) throws CorruptObjectException {
        checkTag(idFor(OBJ_TAG, raw), raw);
    }

    /**
     * Check an annotated tag for errors.
     *
     * @param id
     *            identity of the object being checked.
     * @param raw
     *            the tag data. The array is never modified.
     * @throws org.eclipse.jgit.errors.CorruptObjectException
     *             if any error was detected.
     * @since 4.2
     */
    public void checkTag(@Nullable AnyObjectId id, byte[] raw) throws CorruptObjectException {
        bufPtr.value = 0;
        if (!match(raw, object)) {
            report(MISSING_OBJECT, id, JGitText.get().corruptObjectNoObjectHeader);
        } else if (!checkId(raw)) {
            report(BAD_OBJECT_SHA1, id, JGitText.get().corruptObjectInvalidObject);
        }

        if (!match(raw, type)) {
            report(MISSING_TYPE_ENTRY, id, JGitText.get().corruptObjectNoTypeHeader);
        }
        bufPtr.value = nextLF(raw, bufPtr.value);

        if (!match(raw, tag)) {
            report(MISSING_TAG_ENTRY, id, JGitText.get().corruptObjectNoTagHeader);
        }
        bufPtr.value = nextLF(raw, bufPtr.value);

        if (match(raw, tagger)) {
            checkPersonIdent(raw, id);
        }
    }

    private static boolean duplicateName(final byte[] raw, final int thisNamePos, final int thisNameEnd) {
        final int sz = raw.length;
        int nextPtr = thisNameEnd + 1 + Constants.OBJECT_ID_LENGTH;
        for (;;) {
            int nextMode = 0;
            for (;;) {
                if (nextPtr >= sz)
                    return false;
                final byte c = raw[nextPtr++];
                if (' ' == c)
                    break;
                nextMode <<= 3;
                nextMode += c - '0';
            }

            final int nextNamePos = nextPtr;
            for (;;) {
                if (nextPtr == sz)
                    return false;
                final byte c = raw[nextPtr++];
                if (c == 0)
                    break;
            }
            if (nextNamePos + 1 == nextPtr)
                return false;

            int cmp = compareSameName(raw, thisNamePos, thisNameEnd, raw, nextNamePos, nextPtr - 1, nextMode);
            if (cmp < 0)
                return false;
            else if (cmp == 0)
                return true;

            nextPtr += Constants.OBJECT_ID_LENGTH;
        }
    }

    /**
     * Check a canonical formatted tree for errors.
     *
     * @param raw
     *            the raw tree data. The array is never modified.
     * @throws org.eclipse.jgit.errors.CorruptObjectException
     *             if any error was detected.
     */
    public void checkTree(byte[] raw) throws CorruptObjectException {
        checkTree(idFor(OBJ_TREE, raw), raw);
    }

    /**
     * Check a canonical formatted tree for errors.
     *
     * @param id
     *            identity of the object being checked.
     * @param raw
     *            the raw tree data. The array is never modified.
     * @throws org.eclipse.jgit.errors.CorruptObjectException
     *             if any error was detected.
     * @since 4.2
     */
    public void checkTree(@Nullable AnyObjectId id, byte[] raw) throws CorruptObjectException {
        final int sz = raw.length;
        int ptr = 0;
        int lastNameB = 0, lastNameE = 0, lastMode = 0;
        Set<String> normalized = windows || macosx ? new HashSet<>() : null;

        while (ptr < sz) {
            int thisMode = 0;
            for (;;) {
                if (ptr == sz) {
                    throw new CorruptObjectException(JGitText.get().corruptObjectTruncatedInMode);
                }
                final byte c = raw[ptr++];
                if (' ' == c)
                    break;
                if (c < '0' || c > '7') {
                    throw new CorruptObjectException(JGitText.get().corruptObjectInvalidModeChar);
                }
                if (thisMode == 0 && c == '0') {
                    report(ZERO_PADDED_FILEMODE, id, JGitText.get().corruptObjectInvalidModeStartsZero);
                }
                thisMode <<= 3;
                thisMode += c - '0';
            }

            if (FileMode.fromBits(thisMode).getObjectType() == OBJ_BAD) {
                throw new CorruptObjectException(
                        MessageFormat.format(JGitText.get().corruptObjectInvalidMode2, Integer.valueOf(thisMode)));
            }

            final int thisNameB = ptr;
            ptr = scanPathSegment(raw, ptr, sz, id);
            if (ptr == sz || raw[ptr] != 0) {
                throw new CorruptObjectException(JGitText.get().corruptObjectTruncatedInName);
            }
            checkPathSegment2(raw, thisNameB, ptr, id);
            if (normalized != null) {
                if (!normalized.add(normalize(raw, thisNameB, ptr))) {
                    report(DUPLICATE_ENTRIES, id, JGitText.get().corruptObjectDuplicateEntryNames);
                }
            } else if (duplicateName(raw, thisNameB, ptr)) {
                report(DUPLICATE_ENTRIES, id, JGitText.get().corruptObjectDuplicateEntryNames);
            }

            if (lastNameB != 0) {
                int cmp = compare(raw, lastNameB, lastNameE, lastMode, raw, thisNameB, ptr, thisMode);
                if (cmp > 0) {
                    report(TREE_NOT_SORTED, id, JGitText.get().corruptObjectIncorrectSorting);
                }
            }

            lastNameB = thisNameB;
            lastNameE = ptr;
            lastMode = thisMode;

            ptr += 1 + OBJECT_ID_LENGTH;
            if (ptr > sz) {
                throw new CorruptObjectException(JGitText.get().corruptObjectTruncatedInObjectId);
            }

            if (ObjectId.zeroId().compareTo(raw, ptr - OBJECT_ID_LENGTH) == 0) {
                report(NULL_SHA1, id, JGitText.get().corruptObjectZeroId);
            }

            if (id != null && isGitmodules(raw, lastNameB, lastNameE, id)) {
                ObjectId blob = ObjectId.fromRaw(raw, ptr - OBJECT_ID_LENGTH);
                gitsubmodules.add(new GitmoduleEntry(id, blob));
            }
        }
    }

    private int scanPathSegment(byte[] raw, int ptr, int end, @Nullable AnyObjectId id)
            throws CorruptObjectException {
        for (; ptr < end; ptr++) {
            byte c = raw[ptr];
            if (c == 0) {
                return ptr;
            }
            if (c == '/') {
                report(FULL_PATHNAME, id, JGitText.get().corruptObjectNameContainsSlash);
            }
            if (windows && isInvalidOnWindows(c)) {
                if (c > 31) {
                    throw new CorruptObjectException(
                            String.format(JGitText.get().corruptObjectNameContainsChar, Byte.valueOf(c)));
                }
                throw new CorruptObjectException(
                        String.format(JGitText.get().corruptObjectNameContainsByte, Integer.valueOf(c & 0xff)));
            }
        }
        return ptr;
    }

    @Nullable
    private ObjectId idFor(int objType, byte[] raw) {
        if (skipList != null) {
            try (ObjectInserter.Formatter fmt = new ObjectInserter.Formatter()) {
                return fmt.idFor(objType, raw);
            }
        }
        return null;
    }

    private void report(@NonNull ErrorType err, @Nullable AnyObjectId id, String why)
            throws CorruptObjectException {
        if (errors.contains(err) && (id == null || skipList == null || !skipList.contains(id))) {
            if (id != null) {
                throw new CorruptObjectException(err, id, why);
            }
            throw new CorruptObjectException(why);
        }
    }

    /**
     * Check tree path entry for validity.
     * <p>
     * Unlike {@link #checkPathSegment(byte[], int, int)}, this version scans a
     * multi-directory path string such as {@code "src/main.c"}.
     *
     * @param path
     *            path string to scan.
     * @throws org.eclipse.jgit.errors.CorruptObjectException
     *             path is invalid.
     * @since 3.6
     */
    public void checkPath(String path) throws CorruptObjectException {
        byte[] buf = Constants.encode(path);
        checkPath(buf, 0, buf.length);
    }

    /**
     * Check tree path entry for validity.
     * <p>
     * Unlike {@link #checkPathSegment(byte[], int, int)}, this version scans a
     * multi-directory path string such as {@code "src/main.c"}.
     *
     * @param raw
     *            buffer to scan.
     * @param ptr
     *            offset to first byte of the name.
     * @param end
     *            offset to one past last byte of name.
     * @throws org.eclipse.jgit.errors.CorruptObjectException
     *             path is invalid.
     * @since 3.6
     */
    public void checkPath(byte[] raw, int ptr, int end) throws CorruptObjectException {
        int start = ptr;
        for (; ptr < end; ptr++) {
            if (raw[ptr] == '/') {
                checkPathSegment(raw, start, ptr);
                start = ptr + 1;
            }
        }
        checkPathSegment(raw, start, end);
    }

    /**
     * Check tree path entry for validity.
     *
     * @param raw
     *            buffer to scan.
     * @param ptr
     *            offset to first byte of the name.
     * @param end
     *            offset to one past last byte of name.
     * @throws org.eclipse.jgit.errors.CorruptObjectException
     *             name is invalid.
     * @since 3.4
     */
    public void checkPathSegment(byte[] raw, int ptr, int end) throws CorruptObjectException {
        int e = scanPathSegment(raw, ptr, end, null);
        if (e < end && raw[e] == 0)
            throw new CorruptObjectException(JGitText.get().corruptObjectNameContainsNullByte);
        checkPathSegment2(raw, ptr, end, null);
    }

    private void checkPathSegment2(byte[] raw, int ptr, int end, @Nullable AnyObjectId id)
            throws CorruptObjectException {
        if (ptr == end) {
            report(EMPTY_NAME, id, JGitText.get().corruptObjectNameZeroLength);
            return;
        }

        if (raw[ptr] == '.') {
            switch (end - ptr) {
            case 1:
                report(HAS_DOT, id, JGitText.get().corruptObjectNameDot);
                break;
            case 2:
                if (raw[ptr + 1] == '.') {
                    report(HAS_DOTDOT, id, JGitText.get().corruptObjectNameDotDot);
                }
                break;
            case 4:
                if (isGit(raw, ptr + 1)) {
                    report(HAS_DOTGIT, id, String.format(JGitText.get().corruptObjectInvalidName,
                            RawParseUtils.decode(raw, ptr, end)));
                }
                break;
            default:
                if (end - ptr > 4 && isNormalizedGit(raw, ptr + 1, end)) {
                    report(HAS_DOTGIT, id, String.format(JGitText.get().corruptObjectInvalidName,
                            RawParseUtils.decode(raw, ptr, end)));
                }
            }
        } else if (isGitTilde1(raw, ptr, end)) {
            report(HAS_DOTGIT, id,
                    String.format(JGitText.get().corruptObjectInvalidName, RawParseUtils.decode(raw, ptr, end)));
        }
        if (macosx && isMacHFSGit(raw, ptr, end, id)) {
            report(HAS_DOTGIT, id, String.format(JGitText.get().corruptObjectInvalidNameIgnorableUnicode,
                    RawParseUtils.decode(raw, ptr, end)));
        }

        if (windows) {
            // Windows ignores space and dot at end of file name.
            if (raw[end - 1] == ' ' || raw[end - 1] == '.') {
                report(WIN32_BAD_NAME, id, String.format(JGitText.get().corruptObjectInvalidNameEnd,
                        Character.valueOf(((char) raw[end - 1]))));
            }
            if (end - ptr >= 3) {
                checkNotWindowsDevice(raw, ptr, end, id);
            }
        }
    }

    // Mac's HFS+ folds permutations of ".git" and Unicode ignorable characters
    // to ".git" therefore we should prevent such names
    private boolean isMacHFSPath(byte[] raw, int ptr, int end, byte[] path, @Nullable AnyObjectId id)
            throws CorruptObjectException {
        boolean ignorable = false;
        int g = 0;
        while (ptr < end) {
            switch (raw[ptr]) {
            case (byte) 0xe2: // http://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192
                if (!checkTruncatedIgnorableUTF8(raw, ptr, end, id)) {
                    return false;
                }
                switch (raw[ptr + 1]) {
                case (byte) 0x80:
                    switch (raw[ptr + 2]) {
                    case (byte) 0x8c: // U+200C 0xe2808c ZERO WIDTH NON-JOINER
                    case (byte) 0x8d: // U+200D 0xe2808d ZERO WIDTH JOINER
                    case (byte) 0x8e: // U+200E 0xe2808e LEFT-TO-RIGHT MARK
                    case (byte) 0x8f: // U+200F 0xe2808f RIGHT-TO-LEFT MARK
                    case (byte) 0xaa: // U+202A 0xe280aa LEFT-TO-RIGHT EMBEDDING
                    case (byte) 0xab: // U+202B 0xe280ab RIGHT-TO-LEFT EMBEDDING
                    case (byte) 0xac: // U+202C 0xe280ac POP DIRECTIONAL FORMATTING
                    case (byte) 0xad: // U+202D 0xe280ad LEFT-TO-RIGHT OVERRIDE
                    case (byte) 0xae: // U+202E 0xe280ae RIGHT-TO-LEFT OVERRIDE
                        ignorable = true;
                        ptr += 3;
                        continue;
                    default:
                        return false;
                    }
                case (byte) 0x81:
                    switch (raw[ptr + 2]) {
                    case (byte) 0xaa: // U+206A 0xe281aa INHIBIT SYMMETRIC SWAPPING
                    case (byte) 0xab: // U+206B 0xe281ab ACTIVATE SYMMETRIC SWAPPING
                    case (byte) 0xac: // U+206C 0xe281ac INHIBIT ARABIC FORM SHAPING
                    case (byte) 0xad: // U+206D 0xe281ad ACTIVATE ARABIC FORM SHAPING
                    case (byte) 0xae: // U+206E 0xe281ae NATIONAL DIGIT SHAPES
                    case (byte) 0xaf: // U+206F 0xe281af NOMINAL DIGIT SHAPES
                        ignorable = true;
                        ptr += 3;
                        continue;
                    default:
                        return false;
                    }
                default:
                    return false;
                }
            case (byte) 0xef: // http://www.utf8-chartable.de/unicode-utf8-table.pl?start=65024
                if (!checkTruncatedIgnorableUTF8(raw, ptr, end, id)) {
                    return false;
                }
                // U+FEFF 0xefbbbf ZERO WIDTH NO-BREAK SPACE
                if ((raw[ptr + 1] == (byte) 0xbb) && (raw[ptr + 2] == (byte) 0xbf)) {
                    ignorable = true;
                    ptr += 3;
                    continue;
                }
                return false;
            default:
                if (g == path.length) {
                    return false;
                }
                if (toLower(raw[ptr++]) != path[g++]) {
                    return false;
                }
            }
        }
        if (g == path.length && ignorable) {
            return true;
        }
        return false;
    }

    private boolean isMacHFSGit(byte[] raw, int ptr, int end, @Nullable AnyObjectId id)
            throws CorruptObjectException {
        byte[] git = new byte[] { '.', 'g', 'i', 't' };
        return isMacHFSPath(raw, ptr, end, git, id);
    }

    private boolean isMacHFSGitmodules(byte[] raw, int ptr, int end, @Nullable AnyObjectId id)
            throws CorruptObjectException {
        return isMacHFSPath(raw, ptr, end, dotGitmodules, id);
    }

    private boolean checkTruncatedIgnorableUTF8(byte[] raw, int ptr, int end, @Nullable AnyObjectId id)
            throws CorruptObjectException {
        if ((ptr + 2) >= end) {
            report(BAD_UTF8, id, MessageFormat.format(JGitText.get().corruptObjectInvalidNameInvalidUtf8,
                    toHexString(raw, ptr, end)));
            return false;
        }
        return true;
    }

    private static String toHexString(byte[] raw, int ptr, int end) {
        StringBuilder b = new StringBuilder("0x"); //$NON-NLS-1$
        for (int i = ptr; i < end; i++)
            b.append(String.format("%02x", Byte.valueOf(raw[i]))); //$NON-NLS-1$
        return b.toString();
    }

    private void checkNotWindowsDevice(byte[] raw, int ptr, int end, @Nullable AnyObjectId id)
            throws CorruptObjectException {
        switch (toLower(raw[ptr])) {
        case 'a': // AUX
            if (end - ptr >= 3 && toLower(raw[ptr + 1]) == 'u' && toLower(raw[ptr + 2]) == 'x'
                    && (end - ptr == 3 || raw[ptr + 3] == '.')) {
                report(WIN32_BAD_NAME, id, JGitText.get().corruptObjectInvalidNameAux);
            }
            break;

        case 'c': // CON, COM[1-9]
            if (end - ptr >= 3 && toLower(raw[ptr + 2]) == 'n' && toLower(raw[ptr + 1]) == 'o'
                    && (end - ptr == 3 || raw[ptr + 3] == '.')) {
                report(WIN32_BAD_NAME, id, JGitText.get().corruptObjectInvalidNameCon);
            }
            if (end - ptr >= 4 && toLower(raw[ptr + 2]) == 'm' && toLower(raw[ptr + 1]) == 'o'
                    && isPositiveDigit(raw[ptr + 3]) && (end - ptr == 4 || raw[ptr + 4] == '.')) {
                report(WIN32_BAD_NAME, id, String.format(JGitText.get().corruptObjectInvalidNameCom,
                        Character.valueOf(((char) raw[ptr + 3]))));
            }
            break;

        case 'l': // LPT[1-9]
            if (end - ptr >= 4 && toLower(raw[ptr + 1]) == 'p' && toLower(raw[ptr + 2]) == 't'
                    && isPositiveDigit(raw[ptr + 3]) && (end - ptr == 4 || raw[ptr + 4] == '.')) {
                report(WIN32_BAD_NAME, id, String.format(JGitText.get().corruptObjectInvalidNameLpt,
                        Character.valueOf(((char) raw[ptr + 3]))));
            }
            break;

        case 'n': // NUL
            if (end - ptr >= 3 && toLower(raw[ptr + 1]) == 'u' && toLower(raw[ptr + 2]) == 'l'
                    && (end - ptr == 3 || raw[ptr + 3] == '.')) {
                report(WIN32_BAD_NAME, id, JGitText.get().corruptObjectInvalidNameNul);
            }
            break;

        case 'p': // PRN
            if (end - ptr >= 3 && toLower(raw[ptr + 1]) == 'r' && toLower(raw[ptr + 2]) == 'n'
                    && (end - ptr == 3 || raw[ptr + 3] == '.')) {
                report(WIN32_BAD_NAME, id, JGitText.get().corruptObjectInvalidNamePrn);
            }
            break;
        }
    }

    private static boolean isInvalidOnWindows(byte c) {
        // Windows disallows "special" characters in a path component.
        switch (c) {
        case '"':
        case '*':
        case ':':
        case '<':
        case '>':
        case '?':
        case '\\':
        case '|':
            return true;
        }
        return 1 <= c && c <= 31;
    }

    private static boolean isGit(byte[] buf, int p) {
        return toLower(buf[p]) == 'g' && toLower(buf[p + 1]) == 'i' && toLower(buf[p + 2]) == 't';
    }

    /**
     * Check if the filename contained in buf[start:end] could be read as a
     * .gitmodules file when checked out to the working directory.
     *
     * This ought to be a simple comparison, but some filesystems have peculiar
     * rules for normalizing filenames:
     *
     * NTFS has backward-compatibility support for 8.3 synonyms of long file
     * names (see
     * https://web.archive.org/web/20160318181041/https://usn.pw/blog/gen/2015/06/09/filenames/
     * for details). NTFS is also case-insensitive.
     *
     * MacOS's HFS+ folds away ignorable Unicode characters in addition to case
     * folding.
     *
     * @param buf
     *            byte array to decode
     * @param start
     *            position where a supposed filename is starting
     * @param end
     *            position where a supposed filename is ending
     * @param id
     *            object id for error reporting
     *
     * @return true if the filename in buf could be a ".gitmodules" file
     * @throws CorruptObjectException
     */
    private boolean isGitmodules(byte[] buf, int start, int end, @Nullable AnyObjectId id)
            throws CorruptObjectException {
        // Simple cases first.
        if (end - start < 8) {
            return false;
        }
        return (end - start == dotGitmodules.length && RawParseUtils.match(buf, start, dotGitmodules) != -1)
                || (macosx && isMacHFSGitmodules(buf, start, end, id))
                || (windows && isNTFSGitmodules(buf, start, end));
    }

    private boolean matchLowerCase(byte[] b, int ptr, byte[] src) {
        if (ptr + src.length > b.length) {
            return false;
        }
        for (int i = 0; i < src.length; i++, ptr++) {
            if (toLower(b[ptr]) != src[i]) {
                return false;
            }
        }
        return true;
    }

    // .gitmodules, case-insensitive, or an 8.3 abbreviation of the same.
    private boolean isNTFSGitmodules(byte[] buf, int start, int end) {
        if (end - start == 11) {
            return matchLowerCase(buf, start, dotGitmodules);
        }

        if (end - start != 8) {
            return false;
        }

        // "gitmod" or a prefix of "gi7eba", followed by...
        byte[] gitmod = new byte[] { 'g', 'i', 't', 'm', 'o', 'd', '~' };
        if (matchLowerCase(buf, start, gitmod)) {
            start += 6;
        } else {
            byte[] gi7eba = new byte[] { 'g', 'i', '7', 'e', 'b', 'a' };
            for (int i = 0; i < gi7eba.length; i++, start++) {
                byte c = (byte) toLower(buf[start]);
                if (c == '~') {
                    break;
                }
                if (c != gi7eba[i]) {
                    return false;
                }
            }
        }

        // ... ~ and a number
        if (end - start < 2) {
            return false;
        }
        if (buf[start] != '~') {
            return false;
        }
        start++;
        if (buf[start] < '1' || buf[start] > '9') {
            return false;
        }
        start++;
        for (; start != end; start++) {
            if (buf[start] < '0' || buf[start] > '9') {
                return false;
            }
        }
        return true;
    }

    private static boolean isGitTilde1(byte[] buf, int p, int end) {
        if (end - p != 5)
            return false;
        return toLower(buf[p]) == 'g' && toLower(buf[p + 1]) == 'i' && toLower(buf[p + 2]) == 't'
                && buf[p + 3] == '~' && buf[p + 4] == '1';
    }

    private static boolean isNormalizedGit(byte[] raw, int ptr, int end) {
        if (isGit(raw, ptr)) {
            int dots = 0;
            boolean space = false;
            int p = end - 1;
            for (; (ptr + 2) < p; p--) {
                if (raw[p] == '.')
                    dots++;
                else if (raw[p] == ' ')
                    space = true;
                else
                    break;
            }
            return p == ptr + 2 && (dots == 1 || space);
        }
        return false;
    }

    private boolean match(byte[] b, byte[] src) {
        int r = RawParseUtils.match(b, bufPtr.value, src);
        if (r < 0) {
            return false;
        }
        bufPtr.value = r;
        return true;
    }

    private static char toLower(byte b) {
        if ('A' <= b && b <= 'Z')
            return (char) (b + ('a' - 'A'));
        return (char) b;
    }

    private static boolean isPositiveDigit(byte b) {
        return '1' <= b && b <= '9';
    }

    /**
     * Create a new {@link org.eclipse.jgit.lib.BlobObjectChecker}.
     *
     * @return new BlobObjectChecker or null if it's not provided.
     * @since 4.9
     */
    @Nullable
    public BlobObjectChecker newBlobObjectChecker() {
        return null;
    }

    /**
     * Check a blob for errors.
     *
     * <p>
     * This may not be called from PackParser in some cases. Use
     * {@link #newBlobObjectChecker} instead.
     *
     * @param raw
     *            the blob data. The array is never modified.
     * @throws org.eclipse.jgit.errors.CorruptObjectException
     *             if any error was detected.
     */
    public void checkBlob(byte[] raw) throws CorruptObjectException {
        // We can always assume the blob is valid.
    }

    private String normalize(byte[] raw, int ptr, int end) {
        String n = RawParseUtils.decode(raw, ptr, end).toLowerCase(Locale.US);
        return macosx ? Normalizer.normalize(n, Normalizer.Form.NFC) : n;
    }

    /**
     * Get the list of ".gitmodules" files found in the pack. For each, report
     * its blob id (e.g. to validate its contents) and the tree where it was
     * found (e.g. to check if it is in the root)
     *
     * @return List of pairs of ids {@literal <tree, blob>}.
     *
     * @since 4.7.5
     */
    public List<GitmoduleEntry> getGitsubmodules() {
        return gitsubmodules;
    }

    /**
     * Reset the invocation-specific state from this instance. Specifically this
     * clears the list of .gitmodules files encountered (see
     * {@link #getGitsubmodules()})
     *
     * Configurations like errors to filter, skip lists or the specified O.S.
     * (set via {@link #setSafeForMacOS(boolean)} or
     * {@link #setSafeForWindows(boolean)}) are NOT cleared.
     *
     * @since 5.2
     */
    public void reset() {
        gitsubmodules.clear();
    }
}