Java tutorial
/* * Copyright 2017 Igor Maznitsa. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.igormaznitsa.charsniffer; import org.apache.maven.plugin.AbstractMojo; import org.apache.maven.plugin.MojoExecutionException; import org.apache.maven.plugins.annotations.LifecyclePhase; import org.apache.maven.plugins.annotations.Mojo; import org.apache.maven.plugins.annotations.Parameter; import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.util.HashSet; import java.util.Set; import javax.annotation.Nonnull; import org.apache.commons.io.FileUtils; @Mojo(name = "sniff", defaultPhase = LifecyclePhase.PACKAGE, threadSafe = true) public class CharSnifferMojo extends AbstractMojo { /** * Text files which chars will be sniffed. */ @Parameter(property = "files", required = true) private File[] files; /** * Minimal char code allowed. */ @Parameter(property = "minCharCode", required = false, defaultValue = "-1") private int minCharCode; /** * Maximal char code allowed. */ @Parameter(property = "maxCharCode", required = false, defaultValue = "-1") private int maxCharCode; /** * Char set to decode file chars. */ @Parameter(property = "charSet", required = false, defaultValue = "UTF-8") private String charSet; /** * String of chars which only allowed to be presented in file. */ @Parameter(property = "abc", required = false) private String abc; /** * String of prohibited chars to be presented in text. */ @Parameter(property = "noAbc", required = false) private String noAbc; /** * Fail if a sniffed file has zero length. */ @Parameter(property = "failForEmptyFile", defaultValue = "false") private boolean failForEmptyFile; /** * Validate UTF-8 char bytes. Allows to detect wrong UTF-8 chains. */ @Parameter(property = "validateUtf8", defaultValue = "false") private boolean validateUtf8; /** * Ignore ISO special chars in ABC checking. */ @Parameter(property = "ignoreAbcForISOControl", defaultValue = "true") private boolean ignoreAbcForISOControl; /** * Required End-Of-Line codes (CR,LF,CRLF). */ @Parameter(property = "eol", required = false, defaultValue = "UNDEFINED") private EndOfLine eol; /** * Allow missing files. */ @Parameter(property = "missingFilesAllowed", defaultValue = "false") private boolean missingFilesAllowed; private enum FileStatus { OK, BAD, MISSED } private void printStatus(@Nonnull final File file, @Nonnull final FileStatus status) { final String fileName = file.getName(); final int len = 64 - fileName.length(); final StringBuilder buffer = new StringBuilder(128); buffer.append(fileName); for (int i = 0; i < len; i++) { buffer.append('.'); } buffer.append(status.name()); switch (status) { case BAD: getLog().error(buffer.toString()); break; case MISSED: getLog().warn(buffer.toString()); break; default: getLog().info(buffer.toString()); break; } } static boolean checkForCodes(@Nonnull final String text, @Nonnull final CheckConfig config, @Nonnull final StringBuilder errorBuffer) { final Set<Character> errorChars = new HashSet<Character>(); if (config.minCode >= 0 || config.maxCode >= 0) { for (int i = 0; i < text.length(); i++) { final char c = text.charAt(i); if (config.minCode >= 0) { if (c < config.minCode) { if (!errorChars.contains(c)) { errorChars.add(c); if (errorBuffer.length() > 0) { errorBuffer.append(','); } errorBuffer.append('\'').append(c).append('\''); } } } if (config.maxCode >= 0) { if (c > config.maxCode) { if (!errorChars.contains(c)) { errorChars.add(c); if (errorBuffer.length() > 0) { errorBuffer.append(','); } errorBuffer.append('\'').append(c).append('\''); } } } } } return errorChars.isEmpty(); } static boolean checkForAbc(@Nonnull final String text, @Nonnull final CheckConfig config, @Nonnull final StringBuilder errorBuffer) { final String allowed = config.abc; final String disallowed = config.noAbc; final Set<Character> errorChars = new HashSet<Character>(); if (allowed != null || disallowed != null) { for (int i = 0; i < text.length(); i++) { final char c = text.charAt(i); if (config.ignoreAbcForISOControl && Character.isISOControl(c)) { continue; } if (allowed != null) { if (allowed.indexOf(c) < 0) { if (!errorChars.contains(c)) { errorChars.add(c); if (errorBuffer.length() > 0) { errorBuffer.append(','); } errorBuffer.append('\'').append(c).append('\''); } } } if (disallowed != null) { if (disallowed.indexOf(c) >= 0) { if (!errorChars.contains(c)) { errorChars.add(c); if (errorBuffer.length() > 0) { errorBuffer.append(','); } errorBuffer.append('\'').append(c).append('\''); } } } } } return errorChars.isEmpty(); } static boolean isValidUTF8(@Nonnull final byte[] input) { final CharsetDecoder cs = Charset.forName("UTF-8").newDecoder(); try { cs.decode(ByteBuffer.wrap(input)); return true; } catch (CharacterCodingException e) { return false; } } static boolean checkForEOL(@Nonnull final String text, @Nonnull final CheckConfig config) { boolean result = true; if (config.eol != EndOfLine.UNDEFINED) { final EndOfLine detected = findFirstEOL(text); result = (detected == EndOfLine.UNDEFINED) || (detected == config.eol); } return result; } @Nonnull static EndOfLine findFirstEOL(@Nonnull final String text) { char prev = ' '; EndOfLine result = EndOfLine.UNDEFINED; for (int i = 0; i < text.length(); i++) { final char curChar = text.charAt(i); if (curChar == '\n') { if (prev == '\r') { result = EndOfLine.CRLF; } else { result = EndOfLine.LF; } break; } else if (prev == '\r') { result = EndOfLine.CR; break; } prev = curChar; } if (result == EndOfLine.UNDEFINED) { switch (prev) { case '\n': result = EndOfLine.LF; break; case '\r': result = EndOfLine.CR; break; default: { result = EndOfLine.UNDEFINED; } break; } } return result; } private boolean checkFile(@Nonnull final File file, @Nonnull final CheckConfig config) { try { if (getLog().isDebugEnabled()) { getLog().debug("Sniffing file : " + file); } final String textBody = FileUtils.readFileToString(file, config.charSet); final StringBuilder errorMessageBuffer = new StringBuilder(); boolean result = checkForCodes(textBody, config, errorMessageBuffer); if (!result && getLog().isDebugEnabled()) { getLog().debug("Detected wrong chars : " + errorMessageBuffer.toString()); } errorMessageBuffer.setLength(0); if (result) { result &= checkForAbc(textBody, config, errorMessageBuffer); } if (!result && getLog().isDebugEnabled()) { getLog().debug("Detected wrong ABC chars : " + errorMessageBuffer.toString()); } errorMessageBuffer.setLength(0); if (result) { result &= checkForEOL(textBody, config); if (!result && getLog().isDebugEnabled()) { getLog().debug("Detected wrong EOL"); } } if (result && config.validateUtf8) { result &= isValidUTF8(FileUtils.readFileToByteArray(file)); if (!result && getLog().isDebugEnabled()) { getLog().debug("File '" + file + "' contains wrong UTF-8 byte sequence"); } } return result; } catch (IOException ex) { getLog().error("Can't read text file : " + file, ex); return false; } } @Override public void execute() throws MojoExecutionException { final CheckConfig config = CheckConfig.build().setAbc(this.abc).setNoAbc(this.noAbc) .setCharSet(this.charSet).setEol(this.eol).setMinCode(this.minCharCode).setMaxCode(this.maxCharCode) .setValidateUtf8(this.validateUtf8).setIgnoreAbcForISOControl(this.ignoreAbcForISOControl).build(); int errors = 0; for (final File file : this.files) { if (file.isFile()) { if (file.length() == 0L && this.failForEmptyFile) { printStatus(file, FileStatus.BAD); if (getLog().isDebugEnabled()) { getLog().debug("File '" + file + "' has zero length"); } errors++; } else if (checkFile(file, config)) { printStatus(file, FileStatus.OK); } else { printStatus(file, FileStatus.BAD); errors++; } } else { printStatus(file, FileStatus.MISSED); if (getLog().isDebugEnabled()) { getLog().debug("File '" + file + "' not found"); } if (!this.missingFilesAllowed) { throw new MojoExecutionException("Can't find file : " + file); } } } if (errors > 0) { throw new MojoExecutionException("Detected bad files, check log"); } } }