Java tutorial
/* * Copyright (C) 2015 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package com.google.cloud.dataflow.sdk.runners.worker; import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.positionFromSplitResult; import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtByteOffset; import static com.google.cloud.dataflow.sdk.runners.worker.ReaderTestUtils.splitRequestAtPosition; import static com.google.cloud.dataflow.sdk.runners.worker.SourceTranslationUtils.readerProgressToCloudProgress; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.greaterThan; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import com.google.api.services.dataflow.model.ApproximateProgress; import com.google.api.services.dataflow.model.Position; import com.google.cloud.dataflow.sdk.TestUtils; import com.google.cloud.dataflow.sdk.coders.StringUtf8Coder; import com.google.cloud.dataflow.sdk.coders.TextualIntegerCoder; import com.google.cloud.dataflow.sdk.io.TextIO; import com.google.cloud.dataflow.sdk.io.TextIO.CompressionType; import com.google.cloud.dataflow.sdk.util.CoderUtils; import com.google.cloud.dataflow.sdk.util.IOChannelUtils; import com.google.cloud.dataflow.sdk.util.MimeTypes; import com.google.cloud.dataflow.sdk.util.common.worker.ExecutorTestUtils; import com.google.cloud.dataflow.sdk.util.common.worker.Reader; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.io.PrintStream; import java.nio.channels.Channels; import java.util.ArrayList; import java.util.Arrays; import java.util.LinkedList; import java.util.List; import java.util.zip.GZIPOutputStream; /** * Tests for TextReader. */ @RunWith(JUnit4.class) public class TextReaderTest { private static final String[] fileContent = { "First line\n", "Second line\r\n", "Third line" }; private static final long TOTAL_BYTES_COUNT; static { long sumLen = 0L; for (String s : fileContent) { sumLen += s.length(); } TOTAL_BYTES_COUNT = sumLen; } @Rule public TemporaryFolder tmpFolder = new TemporaryFolder(); @Rule public ExpectedException expectedException = ExpectedException.none(); private File initTestFile() throws IOException { File tmpFile = tmpFolder.newFile(); FileOutputStream output = new FileOutputStream(tmpFile); for (String s : fileContent) { output.write(s.getBytes()); } output.close(); return tmpFile; } @Test public void testReadEmptyFile() throws Exception { TextReader<String> textReader = new TextReader<>(tmpFolder.newFile().getPath(), true, null, null, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED); try (Reader.ReaderIterator<String> iterator = textReader.iterator()) { assertFalse(iterator.hasNext()); } } @Test public void testStrippedNewlines() throws Exception { testNewlineHandling("\r", true); testNewlineHandling("\r\n", true); testNewlineHandling("\n", true); } @Test public void testStrippedNewlinesAtEndOfReadBuffer() throws Exception { boolean stripNewLines = true; StringBuilder payload = new StringBuilder(); for (int i = 0; i < TextReader.BUF_SIZE - 2; ++i) { payload.append('a'); } String[] lines = { payload.toString(), payload.toString() }; testStringPayload(lines, "\r", stripNewLines); testStringPayload(lines, "\r\n", stripNewLines); testStringPayload(lines, "\n", stripNewLines); } @Test public void testUnstrippedNewlines() throws Exception { testNewlineHandling("\r", false); testNewlineHandling("\r\n", false); testNewlineHandling("\n", false); } @Test public void testUnstrippedNewlinesAtEndOfReadBuffer() throws Exception { boolean stripNewLines = false; StringBuilder payload = new StringBuilder(); for (int i = 0; i < TextReader.BUF_SIZE - 2; ++i) { payload.append('a'); } String[] lines = { payload.toString(), payload.toString() }; testStringPayload(lines, "\r", stripNewLines); testStringPayload(lines, "\r\n", stripNewLines); testStringPayload(lines, "\n", stripNewLines); } @Test public void testStartPosition() throws Exception { File tmpFile = initTestFile(); { TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 11L, null, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED); ExecutorTestUtils.TestReaderObserver observer = new ExecutorTestUtils.TestReaderObserver(textReader); try (Reader.ReaderIterator<String> iterator = textReader.iterator()) { assertEquals("Second line\r\n", iterator.next()); assertEquals("Third line", iterator.next()); assertFalse(iterator.hasNext()); // The first '1' in the array represents the reading of '\n' between first and // second line, to confirm that we are reading from the beginning of a record. assertEquals(Arrays.asList(1, 13, 10), observer.getActualSizes()); } } { TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 20L, null, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED); ExecutorTestUtils.TestReaderObserver observer = new ExecutorTestUtils.TestReaderObserver(textReader); try (Reader.ReaderIterator<String> iterator = textReader.iterator()) { assertEquals("Third line", iterator.next()); assertFalse(iterator.hasNext()); // The first '5' in the array represents the reading of a portion of the second // line, which had to be read to find the beginning of the third line. assertEquals(Arrays.asList(5, 10), observer.getActualSizes()); } } { TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), true, 0L, 20L, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED); ExecutorTestUtils.TestReaderObserver observer = new ExecutorTestUtils.TestReaderObserver(textReader); try (Reader.ReaderIterator<String> iterator = textReader.iterator()) { assertEquals("First line", iterator.next()); assertEquals("Second line", iterator.next()); assertFalse(iterator.hasNext()); assertEquals(Arrays.asList(11, 13), observer.getActualSizes()); } } { TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), true, 1L, 20L, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED); ExecutorTestUtils.TestReaderObserver observer = new ExecutorTestUtils.TestReaderObserver(textReader); try (Reader.ReaderIterator<String> iterator = textReader.iterator()) { assertEquals("Second line", iterator.next()); assertFalse(iterator.hasNext()); // The first '11' in the array represents the reading of the entire first // line, which had to be read to find the beginning of the second line. assertEquals(Arrays.asList(11, 13), observer.getActualSizes()); } } } @Test public void testUtf8Handling() throws Exception { File tmpFile = tmpFolder.newFile(); FileOutputStream output = new FileOutputStream(tmpFile); // first line: \n // second line: \n output.write(new byte[] { (byte) 0xE2, (byte) 0x82, (byte) 0xAC, '\n', (byte) 0xC2, (byte) 0xA2, '\n' }); output.close(); { // 3L is after the first line if counting codepoints, but within // the first line if counting chars. So correct behavior is to return // just one line, since offsets are in chars, not codepoints. TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), true, 0L, 3L, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED); ExecutorTestUtils.TestReaderObserver observer = new ExecutorTestUtils.TestReaderObserver(textReader); try (Reader.ReaderIterator<String> iterator = textReader.iterator()) { assertArrayEquals("".getBytes("UTF-8"), iterator.next().getBytes("UTF-8")); assertFalse(iterator.hasNext()); assertEquals(Arrays.asList(4), observer.getActualSizes()); } } { // Starting location is mid-way into a codepoint. // Ensures we don't fail when skipping over an incomplete codepoint. TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), true, 2L, null, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED); ExecutorTestUtils.TestReaderObserver observer = new ExecutorTestUtils.TestReaderObserver(textReader); try (Reader.ReaderIterator<String> iterator = textReader.iterator()) { assertArrayEquals("".getBytes("UTF-8"), iterator.next().getBytes("UTF-8")); assertFalse(iterator.hasNext()); // The first '3' in the array represents the reading of a portion of the first // line, which had to be read to find the beginning of the second line. assertEquals(Arrays.asList(3, 3), observer.getActualSizes()); } } } private void testNewlineHandling(String separator, boolean stripNewlines) throws Exception { File tmpFile = tmpFolder.newFile(); PrintStream writer = new PrintStream(new FileOutputStream(tmpFile)); List<String> expected = Arrays.asList("", " hi there ", "bob", "", " ", "--zowie!--", ""); List<Integer> expectedSizes = new ArrayList<>(); for (String line : expected) { writer.print(line); writer.print(separator); expectedSizes.add(line.length() + separator.length()); } writer.close(); TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), stripNewlines, null, null, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED); ExecutorTestUtils.TestReaderObserver observer = new ExecutorTestUtils.TestReaderObserver(textReader); List<String> actual = new ArrayList<>(); try (Reader.ReaderIterator<String> iterator = textReader.iterator()) { while (iterator.hasNext()) { actual.add(iterator.next()); } } if (stripNewlines) { assertEquals(expected, actual); } else { List<String> unstripped = new LinkedList<>(); for (String s : expected) { unstripped.add(s + separator); } assertEquals(unstripped, actual); } assertEquals(expectedSizes, observer.getActualSizes()); } private void testStringPayload(String[] lines, String separator, boolean stripNewlines) throws Exception { File tmpFile = tmpFolder.newFile(); List<String> expected = new ArrayList<>(); PrintStream writer = new PrintStream(new FileOutputStream(tmpFile)); for (String line : lines) { writer.print(line); writer.print(separator); expected.add(stripNewlines ? line : line + separator); } writer.close(); TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), stripNewlines, null, null, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED); List<String> actual = new ArrayList<>(); try (Reader.ReaderIterator<String> iterator = textReader.iterator()) { while (iterator.hasNext()) { actual.add(iterator.next()); } } assertEquals(expected, actual); } @Test public void testCloneIteratorWithEndPositionAndFinalBytesInBuffer() throws Exception { String line = "a\n"; boolean stripNewlines = false; File tmpFile = tmpFolder.newFile(); List<String> expected = new ArrayList<>(); PrintStream writer = new PrintStream(new FileOutputStream(tmpFile)); // Write 5x the size of the buffer and 10 extra trailing bytes for (long bytesWritten = 0; bytesWritten < TextReader.BUF_SIZE * 3 + 10;) { writer.print(line); expected.add(line); bytesWritten += line.length(); } writer.close(); Long fileSize = tmpFile.length(); TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), stripNewlines, null, fileSize, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED); List<String> actual = new ArrayList<>(); Reader.ReaderIterator<String> iterator = textReader.iterator(); while (iterator.hasNext()) { actual.add(iterator.next()); iterator = iterator.copy(); } assertEquals(expected, actual); } @Test public void testNonStringCoders() throws Exception { File tmpFile = tmpFolder.newFile(); PrintStream writer = new PrintStream(new FileOutputStream(tmpFile)); List<Integer> expected = TestUtils.INTS; List<Integer> expectedSizes = new ArrayList<>(); for (Integer elem : expected) { byte[] encodedElem = CoderUtils.encodeToByteArray(TextualIntegerCoder.of(), elem); writer.print(elem); writer.print("\n"); expectedSizes.add(1 + encodedElem.length); } writer.close(); TextReader<Integer> textReader = new TextReader<>(tmpFile.getPath(), true, null, null, TextualIntegerCoder.of(), TextIO.CompressionType.UNCOMPRESSED); ExecutorTestUtils.TestReaderObserver observer = new ExecutorTestUtils.TestReaderObserver(textReader); List<Integer> actual = new ArrayList<>(); try (Reader.ReaderIterator<Integer> iterator = textReader.iterator()) { while (iterator.hasNext()) { actual.add(iterator.next()); } } assertEquals(expected, actual); assertEquals(expectedSizes, observer.getActualSizes()); } @Test public void testGetProgressNoEndOffset() throws Exception { File tmpFile = initTestFile(); TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 0L, null, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED); try (Reader.ReaderIterator<String> iterator = textReader.iterator()) { ApproximateProgress progress = readerProgressToCloudProgress(iterator.getProgress()); assertEquals(0L, progress.getPosition().getByteOffset().longValue()); iterator.next(); progress = readerProgressToCloudProgress(iterator.getProgress()); assertEquals(11L, progress.getPosition().getByteOffset().longValue()); iterator.next(); progress = readerProgressToCloudProgress(iterator.getProgress()); assertEquals(24L, progress.getPosition().getByteOffset().longValue()); // Since end position is not specified, percentComplete should be null. assertNull(progress.getPercentComplete()); iterator.next(); progress = readerProgressToCloudProgress(iterator.getProgress()); assertEquals(34L, progress.getPosition().getByteOffset().longValue()); assertFalse(iterator.hasNext()); } } @Test public void testGetProgressWithEndOffset() throws Exception { File tmpFile = initTestFile(); TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, 0L, 40L, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED); try (Reader.ReaderIterator<String> iterator = textReader.iterator()) { ApproximateProgress progress = readerProgressToCloudProgress(iterator.getProgress()); iterator.next(); progress = readerProgressToCloudProgress(iterator.getProgress()); assertEquals(1.0f * 11 / 40, progress.getPercentComplete(), 1e-6); iterator.next(); iterator.next(); progress = readerProgressToCloudProgress(iterator.getProgress()); assertEquals(1.0f * 34 / 40, progress.getPercentComplete(), 1e-6); assertFalse(iterator.hasNext()); } } @Test public void testUpdateStopPosition() throws Exception { final long end = 10L; // in the first line final long stop = 14L; // in the middle of the second line File tmpFile = initTestFile(); // Illegal proposed stop position, no update. { TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, null, null, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED); try (TextReader<String>.TextFileIterator iterator = (TextReader<String>.TextFileIterator) textReader .iterator()) { assertNull(iterator.requestDynamicSplit(splitRequestAtPosition(new Position()))); } } // Successful update. { TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, null, null, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED); ExecutorTestUtils.TestReaderObserver observer = new ExecutorTestUtils.TestReaderObserver(textReader); try (TextReader<String>.TextFileIterator iterator = (TextReader<String>.TextFileIterator) textReader .iterator()) { assertNull(iterator.getEndOffset()); assertEquals(Long.valueOf(stop), positionFromSplitResult(iterator.requestDynamicSplit(splitRequestAtByteOffset(stop))) .getByteOffset()); assertEquals(stop, iterator.getEndOffset().longValue()); assertEquals(fileContent[0], iterator.next()); assertEquals(fileContent[1], iterator.next()); assertFalse(iterator.hasNext()); assertEquals(Arrays.asList(fileContent[0].length(), fileContent[1].length()), observer.getActualSizes()); } } // Proposed stop position is before the current position, no update. { TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, null, null, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED); ExecutorTestUtils.TestReaderObserver observer = new ExecutorTestUtils.TestReaderObserver(textReader); try (TextReader<String>.TextFileIterator iterator = (TextReader<String>.TextFileIterator) textReader .iterator()) { assertEquals(fileContent[0], iterator.next()); assertEquals(fileContent[1], iterator.next()); assertThat(readerProgressToCloudProgress(iterator.getProgress()).getPosition().getByteOffset(), greaterThan(stop)); assertNull(iterator.requestDynamicSplit(splitRequestAtByteOffset(stop))); assertNull(iterator.getEndOffset()); assertTrue(iterator.hasNext()); assertEquals(fileContent[2], iterator.next()); assertEquals( Arrays.asList(fileContent[0].length(), fileContent[1].length(), fileContent[2].length()), observer.getActualSizes()); } } // Proposed stop position is after the current stop (end) position, no update. { TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, null, end, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED); ExecutorTestUtils.TestReaderObserver observer = new ExecutorTestUtils.TestReaderObserver(textReader); try (TextReader<String>.TextFileIterator iterator = (TextReader<String>.TextFileIterator) textReader .iterator()) { assertEquals(fileContent[0], iterator.next()); assertNull(iterator.requestDynamicSplit(splitRequestAtByteOffset(stop))); assertEquals(end, iterator.getEndOffset().longValue()); assertFalse(iterator.hasNext()); assertEquals(Arrays.asList(fileContent[0].length()), observer.getActualSizes()); } } } @Test public void testUpdateStopPositionExhaustive() throws Exception { File tmpFile = initTestFile(); // Checks for every possible position in the file, that either we fail to // "updateStop" at it, or we succeed and then reading both halves together // yields the original file with no missed records or duplicates. for (long start = 0; start < TOTAL_BYTES_COUNT - 1; start++) { for (long end = start + 1; end < TOTAL_BYTES_COUNT; end++) { for (long stop = start; stop <= end; stop++) { stopPositionTestInternal(start, end, stop, tmpFile); } } } // Test with null start/end positions. for (long stop = 0L; stop < TOTAL_BYTES_COUNT; stop++) { stopPositionTestInternal(null, null, stop, tmpFile); } } private void stopPositionTestInternal(Long startOffset, Long endOffset, Long stopOffset, File tmpFile) throws Exception { String readWithoutSplit; String readWithSplit1, readWithSplit2; StringBuilder accumulatedRead = new StringBuilder(); // Read from source without split attempts. TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), false, startOffset, endOffset, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED); try (TextReader<String>.TextFileIterator iterator = (TextReader<String>.TextFileIterator) textReader .iterator()) { while (iterator.hasNext()) { accumulatedRead.append(iterator.next()); } readWithoutSplit = accumulatedRead.toString(); } // Read the first half of the split. textReader = new TextReader<>(tmpFile.getPath(), false, startOffset, stopOffset, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED); accumulatedRead = new StringBuilder(); try (TextReader<String>.TextFileIterator iterator = (TextReader<String>.TextFileIterator) textReader .iterator()) { while (iterator.hasNext()) { accumulatedRead.append(iterator.next()); } readWithSplit1 = accumulatedRead.toString(); } // Read the second half of the split. textReader = new TextReader<>(tmpFile.getPath(), false, stopOffset, endOffset, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED); accumulatedRead = new StringBuilder(); try (TextReader<String>.TextFileIterator iterator = (TextReader<String>.TextFileIterator) textReader .iterator()) { while (iterator.hasNext()) { accumulatedRead.append(iterator.next()); } readWithSplit2 = accumulatedRead.toString(); } assertEquals(readWithoutSplit, readWithSplit1 + readWithSplit2); } private OutputStream getOutputStreamForCompressionType(OutputStream stream, CompressionType compressionType) throws IOException { switch (compressionType) { case GZIP: return new GZIPOutputStream(stream); case BZIP2: return new BZip2CompressorOutputStream(stream); case UNCOMPRESSED: case AUTO: return stream; default: fail("Unrecognized stream type"); } return stream; } private File createFileWithCompressionType(String[] lines, String filename, CompressionType compressionType) throws IOException { File tmpFile = tmpFolder.newFile(filename); PrintStream writer = new PrintStream( getOutputStreamForCompressionType(new FileOutputStream(tmpFile), compressionType)); for (String line : lines) { writer.println(line); } writer.close(); return tmpFile; } private void testCompressionTypeHelper(String[] lines, String filename, CompressionType outputCompressionType, CompressionType inputCompressionType) throws IOException { File tmpFile = createFileWithCompressionType(lines, filename, outputCompressionType); List<String> expected = new ArrayList<>(); for (String line : lines) { expected.add(line); } TextReader<String> textReader = new TextReader<>(tmpFile.getPath(), true, null, null, StringUtf8Coder.of(), inputCompressionType); List<String> actual = new ArrayList<>(); try (Reader.ReaderIterator<String> iterator = textReader.iterator()) { while (iterator.hasNext()) { actual.add(iterator.next()); } } assertEquals(expected, actual); tmpFile.delete(); } @Test public void testCompressionTypeOneFile() throws IOException { String[] contents = { "Miserable pigeon", "Vulnerable sparrow", "Brazen crow" }; // test AUTO compression type with different extensions testCompressionTypeHelper(contents, "test.gz", CompressionType.GZIP, CompressionType.AUTO); testCompressionTypeHelper(contents, "test.bz2", CompressionType.BZIP2, CompressionType.AUTO); testCompressionTypeHelper(contents, "test.txt", CompressionType.UNCOMPRESSED, CompressionType.AUTO); testCompressionTypeHelper(contents, "test", CompressionType.UNCOMPRESSED, CompressionType.AUTO); // test GZIP, BZIP2, and UNCOMPRESSED testCompressionTypeHelper(contents, "test.txt", CompressionType.GZIP, CompressionType.GZIP); testCompressionTypeHelper(contents, "test.txt", CompressionType.BZIP2, CompressionType.BZIP2); testCompressionTypeHelper(contents, "test.gz", CompressionType.UNCOMPRESSED, CompressionType.UNCOMPRESSED); } @Test public void testCompressionTypeFileGlob() throws IOException { String[][] contents = { { "Miserable pigeon", "Vulnerable sparrow", "Brazen crow" }, { "Timid osprey", "Lazy vulture" }, { "Erratic finch", "Impressible parakeet" }, }; File[] files = { createFileWithCompressionType(contents[0], "test.gz", CompressionType.GZIP), createFileWithCompressionType(contents[1], "test.bz2", CompressionType.BZIP2), createFileWithCompressionType(contents[2], "test.txt", CompressionType.UNCOMPRESSED), }; List<String> expected = new ArrayList<>(); for (String[] fileContents : contents) { for (String line : fileContents) { expected.add(line); } } String path = tmpFolder.getRoot().getPath() + System.getProperty("file.separator") + "*"; TextReader<String> textReader = new TextReader<>(path, true, null, null, StringUtf8Coder.of(), CompressionType.AUTO); List<String> actual = new ArrayList<>(); try (Reader.ReaderIterator<String> iterator = textReader.iterator()) { while (iterator.hasNext()) { actual.add(iterator.next()); } } assertThat(actual, containsInAnyOrder(expected.toArray())); for (File file : files) { file.delete(); } } @Test public void testErrorOnFileNotFound() throws Exception { expectedException.expect(FileNotFoundException.class); TextReader<String> textReader = new TextReader<>("file-not-found", true, 0L, 100L, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED); textReader.iterator(); } @Test public void testErrorOnMultipleFiles() throws Exception { File file1 = tmpFolder.newFile("foo1.avro"); File file2 = tmpFolder.newFile("foo2.avro"); Channels.newOutputStream(IOChannelUtils.create(file1.getPath(), MimeTypes.BINARY)).close(); Channels.newOutputStream(IOChannelUtils.create(file2.getPath(), MimeTypes.BINARY)).close(); TextReader<String> textReader = new TextReader<>(new File(tmpFolder.getRoot(), "*").getPath(), true, 0L, 100L, StringUtf8Coder.of(), TextIO.CompressionType.UNCOMPRESSED); expectedException.expect(IllegalArgumentException.class); expectedException.expectMessage("more than 1 file matched"); textReader.iterator(); } // TODO: sharded filenames // TODO: reading from GCS }