Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.aliyun.odps.ship.upload; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileReader; import java.lang.reflect.Field; import java.nio.charset.Charset; import org.apache.http.util.ByteArrayBuffer; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import com.aliyun.odps.ship.common.Constants; import com.aliyun.odps.ship.common.RecordConverter; import com.aliyun.odps.ship.common.BlockInfo; /** * ? * */ public class BlockRecordReaderTest { static { Constants.DEFAULT_IO_BUFFER_SIZE = 1 * 1024 * 1024; } static String originalCharset; @BeforeClass public static void setToUTF8() throws Exception { originalCharset = System.getProperty("file.encoding"); setSystemDefaultCharset("UTF-8"); } @AfterClass public static void revertSystemCharset() throws Exception { setSystemDefaultCharset(originalCharset); } public static void setSystemDefaultCharset(String s) throws NoSuchFieldException, IllegalAccessException { System.setProperty("file.encoding", s); Field charset = Charset.class.getDeclaredField("defaultCharset"); charset.setAccessible(true); charset.set(null, null); } /** * ,? * */ @Test public void testReadLineOneline() throws Exception { /** * file content 123,abc 234,bbb * */ BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/one_char_fd_end.txt"), 0L, 45L); BlockRecordReader reader = new BlockRecordReader(blockInfo, ",", "\n", false); byte[] line = reader.readLine(); byte[][] firstLine = reader.splitLine(line); assertEquals("split length not equal", 10, firstLine.length); // 123||abc,,,234||bbb,,,333||ccc,,, assertEquals("not equal", "123||abc", new String(firstLine[0], "UTF-8")); assertEquals("not equal", "", new String(firstLine[1])); assertEquals("not equal", "", new String(firstLine[2])); assertEquals("not equal", "333||ccc", new String(firstLine[6])); assertEquals("not equal", "", new String(firstLine[9])); } /** * ?, * */ @Test public void testReadLineMoreLine() throws Exception { /** * file content 123,abc 234,bbb * */ BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/one_char_split.txt"), 0L, 15L); BlockRecordReader reader = new BlockRecordReader(blockInfo, ",", "\n", false); byte[] line = reader.readLine(); byte[][] firstLine = reader.splitLine(line); assertEquals("not equal", "123", new String(firstLine[0])); assertEquals("not equal", "abc", new String(firstLine[1])); byte[] line2 = reader.readLine(); byte[][] nextLine = reader.splitLine(line2); assertEquals("not equal", "234", new String(nextLine[0])); assertEquals("not equal", "bbb", new String(nextLine[1])); } /** * ???utf8 * */ @Test public void testReadLineChinese() throws Exception { /** * file content 123,abc 234,bbb * */ BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/one_char_split_chinese.txt"), 0L, 27L); BlockRecordReader reader = new BlockRecordReader(blockInfo, ",", "\n", false); byte[] line = reader.readLine(); byte[][] firstLine = reader.splitLine(line); assertEquals("not equal", "123", new String(firstLine[0], "utf8")); assertEquals("not equal", "abc", new String(firstLine[1], "utf8")); byte[] line2 = reader.readLine(); byte[][] nextLine = reader.splitLine(line2); assertEquals("not equal", "234", new String(nextLine[0], "utf8")); assertEquals("not equal", "bbb", new String(nextLine[1], "utf8")); } /** * * */ @Test public void testReadLine_more_char_split() throws Exception { // file content : // 123||abc,,,234||bbb,,,333||ccc,,, BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/more_char_split.txt"), 0L, 33L); BlockRecordReader reader = new BlockRecordReader(blockInfo, "||", ",,,", false); byte[] line = reader.readLine(); byte[][] firstLine = reader.splitLine(line); assertEquals("not equal", "123", new String(firstLine[0])); assertEquals("not equal", "abc", new String(firstLine[1])); byte[] line2 = reader.readLine(); byte[][] nextLine = reader.splitLine(line2); assertEquals("not equal", "234", new String(nextLine[0])); assertEquals("not equal", "bbb", new String(nextLine[1])); byte[] line3 = reader.readLine(); byte[][] tLine = reader.splitLine(line3); assertEquals("not equal", "333", new String(tLine[0])); assertEquals("not equal", "ccc", new String(tLine[1])); } /** * ? * */ @Test public void testReadLineMoreCharChinese() throws Exception { // file content // 123||abc,,,234||bbb,,,333||ccc,,, BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/more_char_split_chinese.txt"), 0L, 45L); BlockRecordReader reader = new BlockRecordReader(blockInfo, "||", ",,,", false); byte[] line = reader.readLine(); byte[][] firstLine = reader.splitLine(line); assertEquals("not equal", "123", new String(firstLine[0], "utf8")); assertEquals("not equal", "abc", new String(firstLine[1], "utf8")); byte[] line2 = reader.readLine(); byte[][] nextLine = reader.splitLine(line2); assertEquals("not equal", "234", new String(nextLine[0], "utf8")); assertEquals("not equal", "bbb", new String(nextLine[1], "utf8")); byte[] line3 = reader.readLine(); byte[][] tLine = reader.splitLine(line3); assertEquals("not equal", "333", new String(tLine[0], "utf8")); assertEquals("not equal", "ccc", new String(tLine[1], "utf8")); } /** * * */ @Test public void testReadLineChineseSplit() throws Exception { // file content // 123abc234bbb333ccc BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/more_char_chinese_spliter.txt"), 0L, 102L); BlockRecordReader reader = new BlockRecordReader(blockInfo, "", "", false); byte[] line = reader.readLine(); byte[][] firstLine = reader.splitLine(line); assertEquals("not equal", "123", new String(firstLine[0], "utf8")); assertEquals("not equal", "abc", new String(firstLine[1], "utf8")); byte[] line2 = reader.readLine(); byte[][] nextLine = reader.splitLine(line2); assertEquals("not equal", "234", new String(nextLine[0], "utf8")); assertEquals("not equal", "bbb", new String(nextLine[1], "utf8")); byte[] line3 = reader.readLine(); byte[][] tLine = reader.splitLine(line3); assertEquals("not equal", "333", new String(tLine[0], "utf8")); assertEquals("not equal", "ccc", new String(tLine[1], "utf8")); } /** * ?bom * */ @Test public void testReadLineChineseBom() throws Exception { // file content // 123||abc,,,234||bbb,,,333||ccc,,, BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/more_char_split_chinese_bom.txt"), 0L, 54L); BlockRecordReader reader = new BlockRecordReader(blockInfo, "||", ",,,", false); byte[] line = reader.readLine(); byte[][] firstLine = reader.splitLine(line); assertEquals("not equal", "123", new String(firstLine[0], "utf8")); assertEquals("not equal", "abc", new String(firstLine[1], "utf8")); byte[] line2 = reader.readLine(); byte[][] nextLine = reader.splitLine(line2); assertEquals("not equal", "234", new String(nextLine[0], "utf8")); assertEquals("not equal", "bbb", new String(nextLine[1], "utf8")); byte[] line3 = reader.readLine(); byte[][] tLine = reader.splitLine(line3); assertEquals("not equal", "333", new String(tLine[0], "utf8")); assertEquals("not equal", "ccc", new String(tLine[1], "utf8")); } /** * ?AA? * */ @Test public void testFieldDelimiterA() throws Exception { /** * file content 123||abc||||234||bbb||||333||ccc| * */ BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/one_char_split_A.txt"), 0L, 15L); BlockRecordReader reader = new BlockRecordReader(blockInfo, "A", "\n", false); byte[] line = reader.readLine(); byte[][] firstLine = reader.splitLine(line); assertEquals("not equal", "123", new String(firstLine[0], "utf8")); assertEquals("not equal", "abc", new String(firstLine[1], "utf8")); line = reader.readLine(); byte[][] nextLine = reader.splitLine(line); assertEquals("not equal", "234", new String(nextLine[0], "utf8")); assertEquals("not equal", "bbb", new String(nextLine[1], "utf8")); reader = new BlockRecordReader(blockInfo, "\u0041", "\n", false); line = reader.readLine(); firstLine = reader.splitLine(line); assertEquals("not equal", "123", new String(firstLine[0], "utf8")); assertEquals("not equal", "abc", new String(firstLine[1], "utf8")); line = reader.readLine(); nextLine = reader.splitLine(line); assertEquals("not equal", "234", new String(nextLine[0], "utf8")); assertEquals("not equal", "bbb", new String(nextLine[1], "utf8")); } /** * split? * */ @Test public void testSpliter() throws Exception { /** * file content 123||abc||||234||bbb||||333||ccc| * */ BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/one_char_split_A.txt"), 0L, 15L); BlockRecordReader reader = new BlockRecordReader(blockInfo, "[", "\n", false); ByteArrayBuffer bBuffer = new ByteArrayBuffer(100); byte[][] l = reader.splitLine("123[abc[456".getBytes()); assertEquals("size not equal", 3, l.length); assertEquals("123 not equal", "123", new String(l[0], "utf8")); assertEquals("abc not equal", "abc", new String(l[1], "utf8")); assertEquals("456 not equal", "456", new String(l[2], "utf8")); l = reader.splitLine("123[[abc[456[".getBytes()); assertEquals("size not equal", 5, l.length); assertEquals("123 not equal", "123", new String(l[0], "utf8")); assertEquals("null not equal", "", new String(l[1], "utf8")); assertEquals("abc not equal", "abc", new String(l[2], "utf8")); assertEquals("456 not equal", "456", new String(l[3], "utf8")); assertEquals("null not equal", "", new String(l[4], "utf8")); l = reader.splitLine("123[[abc[456[[[[".getBytes()); assertEquals("size not equal", 8, l.length); assertEquals("123 not equal", "123", new String(l[0], "utf8")); assertEquals("null not equal", "", new String(l[1], "utf8")); assertEquals("abc not equal", "abc", new String(l[2], "utf8")); assertEquals("456 not equal", "456", new String(l[3], "utf8")); assertEquals("null not equal", "", new String(l[4], "utf8")); assertEquals("null not equal", "", new String(l[5], "utf8")); assertEquals("null not equal", "", new String(l[6], "utf8")); assertEquals("null not equal", "", new String(l[7], "utf8")); reader = new BlockRecordReader(blockInfo, ",", "\n", false); l = reader.splitLine("123,abc,456".getBytes()); assertEquals("size not equal", 3, l.length); assertEquals("123 not equal", "123", new String(l[0], "utf8")); assertEquals("abc not equal", "abc", new String(l[1], "utf8")); assertEquals("456 not equal", "456", new String(l[2], "utf8")); l = reader.splitLine("123,,abc,456,".getBytes()); assertEquals("size not equal", 5, l.length); assertEquals("123 not equal", "123", new String(l[0], "utf8")); assertEquals("null not equal", "", new String(l[1], "utf8")); assertEquals("abc not equal", "abc", new String(l[2], "utf8")); assertEquals("456 not equal", "456", new String(l[3], "utf8")); assertEquals("null not equal", "", new String(l[4], "utf8")); reader = new BlockRecordReader(blockInfo, ",", "\n", false); l = reader.splitLine("123,abc,456".getBytes()); assertEquals("size not equal", 3, l.length); assertEquals("123 not equal", "123", new String(l[0], "utf8")); assertEquals("abc not equal", "abc", new String(l[1], "utf8")); assertEquals("456 not equal", "456", new String(l[2], "utf8")); l = reader.splitLine("123,,abc,456,".getBytes()); assertEquals("size not equal", 5, l.length); assertEquals("123 not equal", "123", new String(l[0], "utf8")); assertEquals("null not equal", "", new String(l[1], "utf8")); assertEquals("abc not equal", "abc", new String(l[2], "utf8")); assertEquals("456 not equal", "456", new String(l[3], "utf8")); assertEquals("null not equal", "", new String(l[4], "utf8")); reader = new BlockRecordReader(blockInfo, "99", "\n", false); l = reader.splitLine("12399abc99456".getBytes()); assertEquals("size not equal", 3, l.length); assertEquals("123 not equal", "123", new String(l[0], "utf8")); assertEquals("abc not equal", "abc", new String(l[1], "utf8")); assertEquals("456 not equal", "456", new String(l[2], "utf8")); l = reader.splitLine("1239999abc9945699".getBytes()); assertEquals("size not equal", 5, l.length); assertEquals("123 not equal", "123", new String(l[0], "utf8")); assertEquals("null not equal", "", new String(l[1], "utf8")); assertEquals("abc not equal", "abc", new String(l[2], "utf8")); assertEquals("456 not equal", "456", new String(l[3], "utf8")); assertEquals("null not equal", "", new String(l[4], "utf8")); } /** * skip??. bom?bom * */ @Test public void testSkipAndRead() throws Exception { //pos is count by bytes BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/more_char_split_chinese_bom.txt"), 34L, 20L); BlockRecordReader reader = new BlockRecordReader(blockInfo, "||", ",,,", false); byte[] line = reader.readLine(); byte[][] tLine = reader.splitLine(line); assertEquals("not equal", "333", new String(tLine[0], "utf8")); assertEquals("not equal", "ccc", new String(tLine[1], "utf8")); BufferedReader inr2 = new BufferedReader( new FileReader("src/test/resources/file/reader/more_char_split_chinese.txt"), 1 * 1024 * 1024); BlockInfo blockInfo2 = new BlockInfo(1L, new File("src/test/resources/file/reader/more_char_split_chinese.txt"), 34L, 20L); BlockRecordReader reader2 = new BlockRecordReader(blockInfo2, "||", ",,,", false); line = reader2.readLine(); assertEquals("not null", line, null); } /** * ???gbk * */ @Test public void testReadLineMoreCharChineseWithGBK() throws Exception { // file content // 123||abc,,,234||bbb,,,333||ccc,,, BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/more_char_split_chinese_gbk.txt"), 0L, 43L); BlockRecordReader reader = new BlockRecordReader(blockInfo, "||", ",,,", false); byte[] line = reader.readLine(); byte[][] firstLine = reader.splitLine(line); assertEquals("not equal", "123", new String(firstLine[0], "gbk")); assertEquals("not equal", "abc", new String(firstLine[1], "gbk")); byte[] line2 = reader.readLine(); byte[][] nextLine = reader.splitLine(line2); assertEquals("not equal", "234", new String(nextLine[0], "gbk")); assertEquals("not equal", "bbb", new String(nextLine[1], "gbk")); byte[] line3 = reader.readLine(); byte[][] tLine = reader.splitLine(line3); assertEquals("not equal", "333", new String(tLine[0], "gbk")); assertEquals("not equal", "ccc", new String(tLine[1], "gbk")); } /** * ? * */ @Test public void testRecordDelimiterIncludeFieldDelimiter() throws Exception { /** * file content 123||abc||||234||bbb||||333||ccc| * */ BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/more_char_split_mix.txt"), 0L, 39L); BlockRecordReader reader = new BlockRecordReader(blockInfo, "||", "||||", false); byte[] line = reader.readLine(); byte[][] firstLine = reader.splitLine(line); assertEquals("not equal", "123", new String(firstLine[0], "utf8")); assertEquals("not equal", "abc", new String(firstLine[1], "utf8")); byte[] line2 = reader.readLine(); byte[][] nextLine = reader.splitLine(line2); assertEquals("not equal", "234", new String(nextLine[0], "utf8")); assertEquals("not equal", "bbb", new String(nextLine[1], "utf8")); byte[] line3 = reader.readLine(); byte[][] tLine = reader.splitLine(line3); assertEquals("not equal", "333", new String(tLine[0], "utf8")); assertEquals("not equal", "ccc|", new String(tLine[1], "utf8")); } /** * ?block **/ @Test public void testReadMultiBlocksNormalCase() throws Exception { // block split exactly at line split { //first block will read 2 line BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/one_char_split.txt"), 0L, 8L); BlockRecordReader reader = new BlockRecordReader(blockInfo, ",", "\n", false); byte[][] firstLine = reader.readTextRecord(); assertEquals("not equal", "123", new String(firstLine[0])); assertEquals("not equal", "abc", new String(firstLine[1])); byte[][] secondLine = reader.readTextRecord(); assertEquals("not equal", "234", new String(secondLine[0])); assertEquals("not equal", "bbb", new String(secondLine[1])); //second block read nothing BlockInfo blockInfo2 = new BlockInfo(1L, new File("src/test/resources/file/reader/one_char_split.txt"), 8L, 7L); reader = new BlockRecordReader(blockInfo2, ",", "\n", false); secondLine = reader.readTextRecord(); assertNull("not null", secondLine); } // block split after line split { //first block will read 2 line BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/one_char_split.txt"), 0L, 9L); BlockRecordReader reader = new BlockRecordReader(blockInfo, ",", "\n", false); byte[][] firstLine = reader.readTextRecord(); assertEquals("not equal", "123", new String(firstLine[0])); assertEquals("not equal", "abc", new String(firstLine[1])); byte[][] secondLine = reader.readTextRecord(); assertEquals("not equal", "234", new String(secondLine[0])); assertEquals("not equal", "bbb", new String(secondLine[1])); //second block read nothing BlockInfo blockInfo2 = new BlockInfo(1L, new File("src/test/resources/file/reader/one_char_split.txt"), 9L, 6L); reader = new BlockRecordReader(blockInfo2, ",", "\n", false); secondLine = reader.readTextRecord(); assertNull("not null", secondLine); } // block split before line split { //first block read first line BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/one_char_split.txt"), 0L, 7L); BlockRecordReader reader = new BlockRecordReader(blockInfo, ",", "\n", false); byte[][] firstLine = reader.readTextRecord(); assertEquals("not equal", "123", new String(firstLine[0])); assertEquals("not equal", "abc", new String(firstLine[1])); firstLine = reader.readTextRecord(); assertNull("not null", firstLine); //second block read second line BlockInfo blockInfo2 = new BlockInfo(1L, new File("src/test/resources/file/reader/one_char_split.txt"), 7L, 8L); reader = new BlockRecordReader(blockInfo2, ",", "\n", false); byte[][] secondLine = reader.readTextRecord(); assertEquals("not equal", "234", new String(secondLine[0])); assertEquals("not equal", "bbb", new String(secondLine[1])); secondLine = reader.readTextRecord(); assertNull("not null", secondLine); } } /** * ?block, **/ @Test public void testReadMultiBlocksAbnormalCase() throws Exception { //test startPos is out of boundary, { //first block will read 2 line BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/one_char_split.txt"), 100L, 8L); BlockRecordReader reader = new BlockRecordReader(blockInfo, ",", "\n", false); byte[][] secondLine = reader.readTextRecord(); assertNull("not null", secondLine); } // test offset is out of boundary { //first block will read 2 line BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/one_char_split.txt"), 0L, 100L); BlockRecordReader reader = new BlockRecordReader(blockInfo, ",", "\n", false); byte[][] firstLine = reader.readTextRecord(); assertEquals("not equal", "123", new String(firstLine[0])); assertEquals("not equal", "abc", new String(firstLine[1])); byte[][] secondLine = reader.readTextRecord(); assertEquals("not equal", "234", new String(secondLine[0])); assertEquals("not equal", "bbb", new String(secondLine[1])); //second block read nothing secondLine = reader.readTextRecord(); assertNull("not null", secondLine); } } /** * ?Block * */ @Test public void testReadMultiBlocksWithMoreCharSplit() throws Exception { // file content : // 123||abc,,,234||bbb,,,333||ccc,,, // first block read 2 record { BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/more_char_split.txt"), 0L, 16L); BlockRecordReader reader = new BlockRecordReader(blockInfo, "||", ",,,", false); byte[][] firstLine = reader.readTextRecord(); assertEquals("not equal", "123", new String(firstLine[0])); assertEquals("not equal", "abc", new String(firstLine[1])); byte[][] nextLine = reader.readTextRecord(); assertEquals("not equal", "234", new String(nextLine[0])); assertEquals("not equal", "bbb", new String(nextLine[1])); byte[][] tLine = reader.readTextRecord(); assertNull("not null", tLine); } // second block read 1 record { BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/more_char_split.txt"), 16L, 16L); BlockRecordReader reader = new BlockRecordReader(blockInfo, "||", ",,,", false); byte[][] firstLine = reader.readTextRecord(); assertEquals("not equal", "333", new String(firstLine[0])); assertEquals("not equal", "ccc", new String(firstLine[1])); } /** * block? * ??20-22block21? * block??2block? **/ // first block read 2 record { BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/more_char_split.txt"), 0L, 21L); BlockRecordReader reader = new BlockRecordReader(blockInfo, "||", ",,,", false); byte[][] firstLine = reader.readTextRecord(); assertEquals("not equal", "123", new String(firstLine[0])); assertEquals("not equal", "abc", new String(firstLine[1])); byte[][] nextLine = reader.readTextRecord(); assertEquals("not equal", "234", new String(nextLine[0])); assertEquals("not equal", "bbb", new String(nextLine[1])); byte[][] tLine = reader.readTextRecord(); assertNull("not null", tLine); } // second block read 1 record { BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/more_char_split.txt"), 21L, 16L); BlockRecordReader reader = new BlockRecordReader(blockInfo, "||", ",,,", false); byte[][] firstLine = reader.readTextRecord(); assertEquals("not equal", "333", new String(firstLine[0])); assertEquals("not equal", "ccc", new String(firstLine[1])); } } /** * bom?block * */ @Test public void testReadMultiBlocksWithBom() throws Exception { // file content // 123||abc,,,234||bbb,,,333||ccc,,, // first block read 2 lines { BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/more_char_split_chinese_bom.txt"), 0L, 27L); BlockRecordReader reader = new BlockRecordReader(blockInfo, "||", ",,,", false); byte[][] firstLine = reader.readTextRecord(); assertEquals("not equal", "123", new String(firstLine[0], "utf8")); assertEquals("not equal", "abc", new String(firstLine[1], "utf8")); byte[][] nextLine = reader.readTextRecord(); assertEquals("not equal", "234", new String(nextLine[0], "utf8")); assertEquals("not equal", "bbb", new String(nextLine[1], "utf8")); byte[][] tLine = reader.readTextRecord(); assertNull("not null", tLine); } // second block read 1 line { BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/more_char_split_chinese_bom.txt"), 27L, 54L); BlockRecordReader reader = new BlockRecordReader(blockInfo, "||", ",,,", false); byte[][] firstLine = reader.readTextRecord(); assertEquals("not equal", "333", new String(firstLine[0], "utf8")); assertEquals("not equal", "ccc", new String(firstLine[1], "utf8")); } } /** * ignore header, * */ @Test public void testReadIgnoreHeader() throws Exception { /** * file content 123,abc 234,bbb, first line will ignore * */ BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/one_char_split.txt"), 0L, 15L); BlockRecordReader reader = new BlockRecordReader(blockInfo, ",", "\n", true); byte[][] firstLine = reader.readTextRecord(); assertEquals("not equal", "234", new String(firstLine[0])); assertEquals("not equal", "bbb", new String(firstLine[1])); byte[][] secondLine = reader.readTextRecord(); assertNull("not null", secondLine); } /** * bom? * */ @Test public void testRDConflictWithBom() throws Exception { // file content // bom head is consumed by bom detected, only 1 line in block { BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/more_char_split_chinese_bom.txt"), 0L, 54L); BlockRecordReader reader = new BlockRecordReader(blockInfo, ",,,,", Character.toString((char) 0xFF), false); byte[][] firstLine = reader.readTextRecord(); assertEquals("not equal", "123||abc,,,234||bbb,,,333||ccc,,,", new String(firstLine[0], "utf8")); byte[][] tLine = reader.readTextRecord(); assertNull("not null", tLine); } // ignore first line, no record read in this case { BlockInfo blockInfo = new BlockInfo(1L, new File("src/test/resources/file/reader/more_char_split_chinese_bom.txt"), 0L, 54L); BlockRecordReader reader = new BlockRecordReader(blockInfo, "||", Character.toString((char) 0xFF), true); byte[][] firstLine = reader.readTextRecord(); assertNull("not null", firstLine); } } }