org.sonar.scanner.scan.filesystem.ByteCharsetDetectorTest.java Source code

Java tutorial

Introduction

Here is the source code for org.sonar.scanner.scan.filesystem.ByteCharsetDetectorTest.java

Source

/*
 * SonarQube
 * Copyright (C) 2009-2017 SonarSource SA
 * mailto:info AT sonarsource DOT com
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 3 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */
package org.sonar.scanner.scan.filesystem;

import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyBoolean;
import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;

import org.apache.commons.io.ByteOrderMark;
import org.junit.Before;
import org.junit.Test;
import org.sonar.scanner.scan.filesystem.CharsetValidation.Result;
import org.sonar.scanner.scan.filesystem.CharsetValidation.Validation;

public class ByteCharsetDetectorTest {
    private CharsetValidation validation;
    private ByteCharsetDetector charsets;

    @Before
    public void setUp() {
        validation = mock(CharsetValidation.class);
        charsets = new ByteCharsetDetector(validation, null);
    }

    @Test
    public void detectBOM() throws URISyntaxException, IOException {
        byte[] b = ByteOrderMark.UTF_16BE.getBytes();
        assertThat(charsets.detectBOM(b)).isEqualTo(ByteOrderMark.UTF_16BE);

        assertThat(charsets.detectBOM(readFile("UTF-8"))).isEqualTo(ByteOrderMark.UTF_8);
        assertThat(charsets.detectBOM(readFile("UTF-16BE"))).isEqualTo(ByteOrderMark.UTF_16BE);
        assertThat(charsets.detectBOM(readFile("UTF-16LE"))).isEqualTo(ByteOrderMark.UTF_16LE);
        assertThat(charsets.detectBOM(readFile("UTF-32BE"))).isEqualTo(ByteOrderMark.UTF_32BE);
        assertThat(charsets.detectBOM(readFile("UTF-32LE"))).isEqualTo(ByteOrderMark.UTF_32LE);
    }

    private byte[] readFile(String fileName) throws URISyntaxException, IOException {
        Path path = Paths.get(this.getClass().getClassLoader()
                .getResource("org/sonar/scanner/scan/filesystem/" + fileName + ".txt").toURI());
        return Files.readAllBytes(path);
    }

    @Test
    public void tryUTF8First() {
        when(validation.isUTF8(any(byte[].class), anyBoolean()))
                .thenReturn(Result.newValid(StandardCharsets.UTF_8));
        assertThat(charsets.detect(new byte[1])).isEqualTo(StandardCharsets.UTF_8);
    }

    @Test
    public void tryUTF16heuristics() {
        when(validation.isUTF8(any(byte[].class), anyBoolean())).thenReturn(Result.INVALID);
        when(validation.isUTF16(any(byte[].class), anyBoolean()))
                .thenReturn(Result.newValid(StandardCharsets.UTF_16));
        when(validation.isValidUTF16(any(byte[].class), anyBoolean())).thenReturn(true);

        assertThat(charsets.detect(new byte[1])).isEqualTo(StandardCharsets.UTF_16);
    }

    @Test
    public void failAll() {
        when(validation.isUTF8(any(byte[].class), anyBoolean())).thenReturn(Result.INVALID);
        when(validation.isUTF16(any(byte[].class), anyBoolean())).thenReturn(new Result(Validation.MAYBE, null));

        assertThat(charsets.detect(new byte[1])).isEqualTo(null);
    }

    @Test
    public void failAnsii() {
        when(validation.isUTF8(any(byte[].class), anyBoolean())).thenReturn(new Result(Validation.MAYBE, null));
        when(validation.isUTF16(any(byte[].class), anyBoolean()))
                .thenReturn(Result.newValid(StandardCharsets.UTF_16));
        when(validation.isValidUTF16(any(byte[].class), anyBoolean())).thenReturn(true);

        assertThat(charsets.detect(new byte[1])).isEqualTo(null);
    }

    @Test
    public void tryUserAnsii() {
        when(validation.isUTF8(any(byte[].class), anyBoolean())).thenReturn(new Result(Validation.MAYBE, null));
        when(validation.isUTF16(any(byte[].class), anyBoolean()))
                .thenReturn(Result.newValid(StandardCharsets.UTF_16));
        when(validation.isValidUTF16(any(byte[].class), anyBoolean())).thenReturn(true);
        when(validation.tryDecode(any(byte[].class), eq(StandardCharsets.ISO_8859_1))).thenReturn(true);

        charsets = new ByteCharsetDetector(validation, StandardCharsets.ISO_8859_1);
        assertThat(charsets.detect(new byte[1])).isEqualTo(StandardCharsets.ISO_8859_1);
    }

    @Test
    public void tryOtherUserCharset() {
        when(validation.isUTF8(any(byte[].class), anyBoolean())).thenReturn(Result.INVALID);
        when(validation.isUTF16(any(byte[].class), anyBoolean())).thenReturn(new Result(Validation.MAYBE, null));
        when(validation.tryDecode(any(byte[].class), eq(StandardCharsets.ISO_8859_1))).thenReturn(true);

        charsets = new ByteCharsetDetector(validation, StandardCharsets.ISO_8859_1);
        assertThat(charsets.detect(new byte[1])).isEqualTo(StandardCharsets.ISO_8859_1);
    }

    @Test
    public void invalidBOM() {
        byte[] b1 = { (byte) 0xFF, (byte) 0xFF };
        assertThat(charsets.detectBOM(b1)).isNull();

        // not enough bytes
        byte[] b2 = { (byte) 0xFE };
        assertThat(charsets.detectBOM(b2)).isNull();

        // empty
        byte[] b3 = new byte[0];
        assertThat(charsets.detectBOM(b3)).isNull();
    }
}