Java tutorial
/* * SonarQube * Copyright (C) 2009-2017 SonarSource SA * mailto:info AT sonarsource DOT com * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 3 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ package org.sonar.api.batch.fs.internal; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; import java.nio.charset.CharsetEncoder; import java.nio.charset.CodingErrorAction; import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import javax.annotation.CheckForNull; import javax.annotation.Nullable; import org.apache.commons.codec.binary.Hex; import org.apache.commons.codec.digest.DigestUtils; import org.sonar.api.CoreProperties; import org.sonar.api.batch.ScannerSide; import org.sonar.api.batch.fs.InputFile; import org.sonar.api.utils.log.Logger; import org.sonar.api.utils.log.Loggers; /** * Computes hash of files. Ends of Lines are ignored, so files with * same content but different EOL encoding have the same hash. */ @ScannerSide public class FileMetadata { private static final Logger LOG = Loggers.get(FileMetadata.class); private static final char LINE_FEED = '\n'; private static final char CARRIAGE_RETURN = '\r'; public abstract static class CharHandler { protected void handleAll(char c) { } protected void handleIgnoreEoL(char c) { } protected void newLine() { } protected void eof() { } } private static class LineCounter extends CharHandler { private int lines = 1; private int nonBlankLines = 0; private boolean blankLine = true; boolean alreadyLoggedInvalidCharacter = false; private final String filePath; private final Charset encoding; LineCounter(String filePath, Charset encoding) { this.filePath = filePath; this.encoding = encoding; } @Override protected void handleAll(char c) { if (!alreadyLoggedInvalidCharacter && c == '\ufffd') { LOG.warn( "Invalid character encountered in file {} at line {} for encoding {}. Please fix file content or configure the encoding to be used using property '{}'.", filePath, lines, encoding, CoreProperties.ENCODING_PROPERTY); alreadyLoggedInvalidCharacter = true; } } @Override protected void newLine() { lines++; if (!blankLine) { nonBlankLines++; } blankLine = true; } @Override protected void handleIgnoreEoL(char c) { if (!Character.isWhitespace(c)) { blankLine = false; } } @Override protected void eof() { if (!blankLine) { nonBlankLines++; } } public int lines() { return lines; } public int nonBlankLines() { return nonBlankLines; } } private static class FileHashComputer extends CharHandler { private MessageDigest globalMd5Digest = DigestUtils.getMd5Digest(); private StringBuilder sb = new StringBuilder(); private final CharsetEncoder encoder; private final String filePath; public FileHashComputer(String filePath) { encoder = StandardCharsets.UTF_8.newEncoder().onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE); this.filePath = filePath; } @Override protected void handleIgnoreEoL(char c) { sb.append(c); } @Override protected void newLine() { sb.append(LINE_FEED); processBuffer(); sb.setLength(0); } @Override protected void eof() { if (sb.length() > 0) { processBuffer(); } } private void processBuffer() { try { if (sb.length() > 0) { ByteBuffer encoded = encoder.encode(CharBuffer.wrap(sb)); globalMd5Digest.update(encoded.array(), 0, encoded.limit()); } } catch (CharacterCodingException e) { throw new IllegalStateException("Error encoding line hash in file: " + filePath, e); } } @CheckForNull public String getHash() { return Hex.encodeHexString(globalMd5Digest.digest()); } } private static class LineHashComputer extends CharHandler { private final MessageDigest lineMd5Digest = DigestUtils.getMd5Digest(); private final CharsetEncoder encoder; private final StringBuilder sb = new StringBuilder(); private final LineHashConsumer consumer; private final File file; private int line = 1; public LineHashComputer(LineHashConsumer consumer, File f) { this.consumer = consumer; this.file = f; this.encoder = StandardCharsets.UTF_8.newEncoder().onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE); } @Override protected void handleIgnoreEoL(char c) { if (!Character.isWhitespace(c)) { sb.append(c); } } @Override protected void newLine() { processBuffer(); sb.setLength(0); line++; } @Override protected void eof() { if (this.line > 0) { processBuffer(); } } private void processBuffer() { try { if (sb.length() > 0) { ByteBuffer encoded = encoder.encode(CharBuffer.wrap(sb)); lineMd5Digest.update(encoded.array(), 0, encoded.limit()); consumer.consume(line, lineMd5Digest.digest()); } } catch (CharacterCodingException e) { throw new IllegalStateException("Error encoding line hash in file: " + file.getAbsolutePath(), e); } } } private static class LineOffsetCounter extends CharHandler { private long currentOriginalOffset = 0; private IntArrayList originalLineOffsets = new IntArrayList(); private long lastValidOffset = 0; public LineOffsetCounter() { originalLineOffsets.add(0); } @Override protected void handleAll(char c) { currentOriginalOffset++; } @Override protected void newLine() { if (currentOriginalOffset > Integer.MAX_VALUE) { throw new IllegalStateException("File is too big: " + currentOriginalOffset); } originalLineOffsets.add((int) currentOriginalOffset); } @Override protected void eof() { lastValidOffset = currentOriginalOffset; } public int[] getOriginalLineOffsets() { return originalLineOffsets.trimAndGet(); } public int getLastValidOffset() { if (lastValidOffset > Integer.MAX_VALUE) { throw new IllegalStateException("File is too big: " + lastValidOffset); } return (int) lastValidOffset; } } /** * Compute hash of a file ignoring line ends differences. * Maximum performance is needed. */ public Metadata readMetadata(InputStream stream, Charset encoding, String filePath, @Nullable CharHandler otherHandler) { LineCounter lineCounter = new LineCounter(filePath, encoding); FileHashComputer fileHashComputer = new FileHashComputer(filePath); LineOffsetCounter lineOffsetCounter = new LineOffsetCounter(); if (otherHandler != null) { CharHandler[] handlers = { lineCounter, fileHashComputer, lineOffsetCounter, otherHandler }; readFile(stream, encoding, filePath, handlers); } else { CharHandler[] handlers = { lineCounter, fileHashComputer, lineOffsetCounter }; readFile(stream, encoding, filePath, handlers); } return new Metadata(lineCounter.lines(), lineCounter.nonBlankLines(), fileHashComputer.getHash(), lineOffsetCounter.getOriginalLineOffsets(), lineOffsetCounter.getLastValidOffset()); } public Metadata readMetadata(InputStream stream, Charset encoding, String filePath) { return readMetadata(stream, encoding, filePath, null); } /** * For testing purpose */ public Metadata readMetadata(Reader reader) { LineCounter lineCounter = new LineCounter("fromString", StandardCharsets.UTF_16); FileHashComputer fileHashComputer = new FileHashComputer("fromString"); LineOffsetCounter lineOffsetCounter = new LineOffsetCounter(); CharHandler[] handlers = { lineCounter, fileHashComputer, lineOffsetCounter }; try { read(reader, handlers); } catch (IOException e) { throw new IllegalStateException("Should never occur", e); } return new Metadata(lineCounter.lines(), lineCounter.nonBlankLines(), fileHashComputer.getHash(), lineOffsetCounter.getOriginalLineOffsets(), lineOffsetCounter.getLastValidOffset()); } public static void readFile(InputStream stream, Charset encoding, String filePath, CharHandler[] handlers) { try (Reader reader = new BufferedReader(new InputStreamReader(stream, encoding))) { read(reader, handlers); } catch (IOException e) { throw new IllegalStateException( String.format("Fail to read file '%s' with encoding '%s'", filePath, encoding), e); } } private static void read(Reader reader, CharHandler[] handlers) throws IOException { char c; int i = reader.read(); boolean afterCR = false; while (i != -1) { c = (char) i; if (afterCR) { for (CharHandler handler : handlers) { if (c == CARRIAGE_RETURN) { handler.newLine(); handler.handleAll(c); } else if (c == LINE_FEED) { handler.handleAll(c); handler.newLine(); } else { handler.newLine(); handler.handleIgnoreEoL(c); handler.handleAll(c); } } afterCR = c == CARRIAGE_RETURN; } else if (c == LINE_FEED) { for (CharHandler handler : handlers) { handler.handleAll(c); handler.newLine(); } } else if (c == CARRIAGE_RETURN) { afterCR = true; for (CharHandler handler : handlers) { handler.handleAll(c); } } else { for (CharHandler handler : handlers) { handler.handleIgnoreEoL(c); handler.handleAll(c); } } i = reader.read(); } for (CharHandler handler : handlers) { if (afterCR) { handler.newLine(); } handler.eof(); } } @FunctionalInterface public interface LineHashConsumer { void consume(int lineIdx, @Nullable byte[] hash); } /** * Compute a MD5 hash of each line of the file after removing of all blank chars */ public static void computeLineHashesForIssueTracking(InputFile f, LineHashConsumer consumer) { try { readFile(f.inputStream(), f.charset(), f.absolutePath(), new CharHandler[] { new LineHashComputer(consumer, f.file()) }); } catch (IOException e) { throw new IllegalStateException("Failed to compute line hashes for " + f.absolutePath(), e); } } }