org.sonar.api.batch.fs.internal.FileMetadata.java Source code

Introduction

Here is the source code for org.sonar.api.batch.fs.internal.FileMetadata.java
Source

/*
 * SonarQube
 * Copyright (C) 2009-2017 SonarSource SA
 * mailto:info AT sonarsource DOT com
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 3 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */
package org.sonar.api.batch.fs.internal;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;

import javax.annotation.CheckForNull;
import javax.annotation.Nullable;

import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.DigestUtils;
import org.sonar.api.CoreProperties;
import org.sonar.api.batch.ScannerSide;
import org.sonar.api.batch.fs.InputFile;
import org.sonar.api.utils.log.Logger;
import org.sonar.api.utils.log.Loggers;

/**
 * Computes hash of files. Ends of Lines are ignored, so files with
 * same content but different EOL encoding have the same hash.
 */
@ScannerSide
public class FileMetadata {

    private static final Logger LOG = Loggers.get(FileMetadata.class);

    private static final char LINE_FEED = '\n';
    private static final char CARRIAGE_RETURN = '\r';

    public abstract static class CharHandler {

        protected void handleAll(char c) {
        }

        protected void handleIgnoreEoL(char c) {
        }

        protected void newLine() {
        }

        protected void eof() {
        }
    }

    private static class LineCounter extends CharHandler {
        private int lines = 1;
        private int nonBlankLines = 0;
        private boolean blankLine = true;
        boolean alreadyLoggedInvalidCharacter = false;
        private final String filePath;
        private final Charset encoding;

        LineCounter(String filePath, Charset encoding) {
            this.filePath = filePath;
            this.encoding = encoding;
        }

        @Override
        protected void handleAll(char c) {
            if (!alreadyLoggedInvalidCharacter && c == '\ufffd') {
                LOG.warn(
                        "Invalid character encountered in file {} at line {} for encoding {}. Please fix file content or configure the encoding to be used using property '{}'.",
                        filePath, lines, encoding, CoreProperties.ENCODING_PROPERTY);
                alreadyLoggedInvalidCharacter = true;
            }
        }

        @Override
        protected void newLine() {
            lines++;
            if (!blankLine) {
                nonBlankLines++;
            }
            blankLine = true;
        }

        @Override
        protected void handleIgnoreEoL(char c) {
            if (!Character.isWhitespace(c)) {
                blankLine = false;
            }
        }

        @Override
        protected void eof() {
            if (!blankLine) {
                nonBlankLines++;
            }
        }

        public int lines() {
            return lines;
        }

        public int nonBlankLines() {
            return nonBlankLines;
        }

    }

    private static class FileHashComputer extends CharHandler {
        private MessageDigest globalMd5Digest = DigestUtils.getMd5Digest();
        private StringBuilder sb = new StringBuilder();
        private final CharsetEncoder encoder;
        private final String filePath;

        public FileHashComputer(String filePath) {
            encoder = StandardCharsets.UTF_8.newEncoder().onMalformedInput(CodingErrorAction.REPLACE)
                    .onUnmappableCharacter(CodingErrorAction.REPLACE);
            this.filePath = filePath;
        }

        @Override
        protected void handleIgnoreEoL(char c) {
            sb.append(c);
        }

        @Override
        protected void newLine() {
            sb.append(LINE_FEED);
            processBuffer();
            sb.setLength(0);
        }

        @Override
        protected void eof() {
            if (sb.length() > 0) {
                processBuffer();
            }
        }

        private void processBuffer() {
            try {
                if (sb.length() > 0) {
                    ByteBuffer encoded = encoder.encode(CharBuffer.wrap(sb));
                    globalMd5Digest.update(encoded.array(), 0, encoded.limit());
                }
            } catch (CharacterCodingException e) {
                throw new IllegalStateException("Error encoding line hash in file: " + filePath, e);
            }
        }

        @CheckForNull
        public String getHash() {
            return Hex.encodeHexString(globalMd5Digest.digest());
        }
    }

    private static class LineHashComputer extends CharHandler {
        private final MessageDigest lineMd5Digest = DigestUtils.getMd5Digest();
        private final CharsetEncoder encoder;
        private final StringBuilder sb = new StringBuilder();
        private final LineHashConsumer consumer;
        private final File file;
        private int line = 1;

        public LineHashComputer(LineHashConsumer consumer, File f) {
            this.consumer = consumer;
            this.file = f;
            this.encoder = StandardCharsets.UTF_8.newEncoder().onMalformedInput(CodingErrorAction.REPLACE)
                    .onUnmappableCharacter(CodingErrorAction.REPLACE);
        }

        @Override
        protected void handleIgnoreEoL(char c) {
            if (!Character.isWhitespace(c)) {
                sb.append(c);
            }
        }

        @Override
        protected void newLine() {
            processBuffer();
            sb.setLength(0);
            line++;
        }

        @Override
        protected void eof() {
            if (this.line > 0) {
                processBuffer();
            }
        }

        private void processBuffer() {
            try {
                if (sb.length() > 0) {
                    ByteBuffer encoded = encoder.encode(CharBuffer.wrap(sb));
                    lineMd5Digest.update(encoded.array(), 0, encoded.limit());
                    consumer.consume(line, lineMd5Digest.digest());
                }
            } catch (CharacterCodingException e) {
                throw new IllegalStateException("Error encoding line hash in file: " + file.getAbsolutePath(), e);
            }
        }
    }

    private static class LineOffsetCounter extends CharHandler {
        private long currentOriginalOffset = 0;
        private IntArrayList originalLineOffsets = new IntArrayList();
        private long lastValidOffset = 0;

        public LineOffsetCounter() {
            originalLineOffsets.add(0);
        }

        @Override
        protected void handleAll(char c) {
            currentOriginalOffset++;
        }

        @Override
        protected void newLine() {
            if (currentOriginalOffset > Integer.MAX_VALUE) {
                throw new IllegalStateException("File is too big: " + currentOriginalOffset);
            }
            originalLineOffsets.add((int) currentOriginalOffset);
        }

        @Override
        protected void eof() {
            lastValidOffset = currentOriginalOffset;
        }

        public int[] getOriginalLineOffsets() {
            return originalLineOffsets.trimAndGet();
        }

        public int getLastValidOffset() {
            if (lastValidOffset > Integer.MAX_VALUE) {
                throw new IllegalStateException("File is too big: " + lastValidOffset);
            }
            return (int) lastValidOffset;
        }

    }

    /**
     * Compute hash of a file ignoring line ends differences.
     * Maximum performance is needed.
     */
    public Metadata readMetadata(InputStream stream, Charset encoding, String filePath,
            @Nullable CharHandler otherHandler) {
        LineCounter lineCounter = new LineCounter(filePath, encoding);
        FileHashComputer fileHashComputer = new FileHashComputer(filePath);
        LineOffsetCounter lineOffsetCounter = new LineOffsetCounter();

        if (otherHandler != null) {
            CharHandler[] handlers = { lineCounter, fileHashComputer, lineOffsetCounter, otherHandler };
            readFile(stream, encoding, filePath, handlers);
        } else {
            CharHandler[] handlers = { lineCounter, fileHashComputer, lineOffsetCounter };
            readFile(stream, encoding, filePath, handlers);
        }
        return new Metadata(lineCounter.lines(), lineCounter.nonBlankLines(), fileHashComputer.getHash(),
                lineOffsetCounter.getOriginalLineOffsets(), lineOffsetCounter.getLastValidOffset());
    }

    public Metadata readMetadata(InputStream stream, Charset encoding, String filePath) {
        return readMetadata(stream, encoding, filePath, null);
    }

    /**
     * For testing purpose
     */
    public Metadata readMetadata(Reader reader) {
        LineCounter lineCounter = new LineCounter("fromString", StandardCharsets.UTF_16);
        FileHashComputer fileHashComputer = new FileHashComputer("fromString");
        LineOffsetCounter lineOffsetCounter = new LineOffsetCounter();
        CharHandler[] handlers = { lineCounter, fileHashComputer, lineOffsetCounter };

        try {
            read(reader, handlers);
        } catch (IOException e) {
            throw new IllegalStateException("Should never occur", e);
        }
        return new Metadata(lineCounter.lines(), lineCounter.nonBlankLines(), fileHashComputer.getHash(),
                lineOffsetCounter.getOriginalLineOffsets(), lineOffsetCounter.getLastValidOffset());
    }

    public static void readFile(InputStream stream, Charset encoding, String filePath, CharHandler[] handlers) {
        try (Reader reader = new BufferedReader(new InputStreamReader(stream, encoding))) {
            read(reader, handlers);
        } catch (IOException e) {
            throw new IllegalStateException(
                    String.format("Fail to read file '%s' with encoding '%s'", filePath, encoding), e);
        }
    }

    private static void read(Reader reader, CharHandler[] handlers) throws IOException {
        char c;
        int i = reader.read();
        boolean afterCR = false;
        while (i != -1) {
            c = (char) i;
            if (afterCR) {
                for (CharHandler handler : handlers) {
                    if (c == CARRIAGE_RETURN) {
                        handler.newLine();
                        handler.handleAll(c);
                    } else if (c == LINE_FEED) {
                        handler.handleAll(c);
                        handler.newLine();
                    } else {
                        handler.newLine();
                        handler.handleIgnoreEoL(c);
                        handler.handleAll(c);
                    }
                }
                afterCR = c == CARRIAGE_RETURN;
            } else if (c == LINE_FEED) {
                for (CharHandler handler : handlers) {
                    handler.handleAll(c);
                    handler.newLine();
                }
            } else if (c == CARRIAGE_RETURN) {
                afterCR = true;
                for (CharHandler handler : handlers) {
                    handler.handleAll(c);
                }
            } else {
                for (CharHandler handler : handlers) {
                    handler.handleIgnoreEoL(c);
                    handler.handleAll(c);
                }
            }
            i = reader.read();
        }
        for (CharHandler handler : handlers) {
            if (afterCR) {
                handler.newLine();
            }
            handler.eof();
        }
    }

    @FunctionalInterface
    public interface LineHashConsumer {
        void consume(int lineIdx, @Nullable byte[] hash);
    }

    /**
     * Compute a MD5 hash of each line of the file after removing of all blank chars
     */
    public static void computeLineHashesForIssueTracking(InputFile f, LineHashConsumer consumer) {
        try {
            readFile(f.inputStream(), f.charset(), f.absolutePath(),
                    new CharHandler[] { new LineHashComputer(consumer, f.file()) });
        } catch (IOException e) {
            throw new IllegalStateException("Failed to compute line hashes for " + f.absolutePath(), e);
        }
    }
}