org.apache.flink.api.common.io.GenericCsvInputFormatTest.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.flink.api.common.io.GenericCsvInputFormatTest.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.api.common.io;

import org.apache.commons.lang3.StringUtils;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.fs.FileInputSplit;
import org.apache.flink.core.fs.Path;
import org.apache.flink.types.DoubleValue;
import org.apache.flink.types.IntValue;
import org.apache.flink.types.LongValue;
import org.apache.flink.types.StringValue;
import org.apache.flink.types.Value;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Arrays;
import java.util.zip.DeflaterOutputStream;
import java.util.zip.GZIPOutputStream;

import static org.apache.flink.api.common.io.DelimitedInputFormatTest.createTempFile;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

public class GenericCsvInputFormatTest {

    private TestCsvInputFormat format;

    // --------------------------------------------------------------------------------------------

    @Before
    public void setup() {
        format = new TestCsvInputFormat();
        format.setFilePath("file:///some/file/that/will/not/be/read");
    }

    @After
    public void setdown() throws Exception {
        if (this.format != null) {
            this.format.close();
        }
    }

    @Test
    public void testSparseFieldArray() {

        @SuppressWarnings("unchecked")
        Class<? extends Value>[] originalTypes = new Class[] { IntValue.class, null, null, StringValue.class, null,
                DoubleValue.class };

        format.setFieldTypesGeneric(originalTypes);
        assertEquals(3, format.getNumberOfNonNullFields());
        assertEquals(6, format.getNumberOfFieldsTotal());

        assertTrue(Arrays.equals(originalTypes, format.getGenericFieldTypes()));
    }

    @Test
    public void testReadNoPosAll() throws IOException {
        try {
            final String fileContent = "111|222|333|444|555\n666|777|888|999|000|";
            final FileInputSplit split = createTempFile(fileContent);

            final Configuration parameters = new Configuration();

            format.setFieldDelimiter("|");
            format.setFieldTypesGeneric(IntValue.class, IntValue.class, IntValue.class, IntValue.class,
                    IntValue.class);

            format.configure(parameters);
            format.open(split);

            Value[] values = createIntValues(5);

            values = format.nextRecord(values);
            assertNotNull(values);
            assertEquals(111, ((IntValue) values[0]).getValue());
            assertEquals(222, ((IntValue) values[1]).getValue());
            assertEquals(333, ((IntValue) values[2]).getValue());
            assertEquals(444, ((IntValue) values[3]).getValue());
            assertEquals(555, ((IntValue) values[4]).getValue());

            values = format.nextRecord(values);
            assertNotNull(values);
            assertEquals(666, ((IntValue) values[0]).getValue());
            assertEquals(777, ((IntValue) values[1]).getValue());
            assertEquals(888, ((IntValue) values[2]).getValue());
            assertEquals(999, ((IntValue) values[3]).getValue());
            assertEquals(000, ((IntValue) values[4]).getValue());

            assertNull(format.nextRecord(values));
            assertTrue(format.reachedEnd());
        } catch (Exception ex) {
            fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
        }
    }

    @Test
    public void testReadNoPosAllDeflate() throws IOException {
        try {
            final String fileContent = "111|222|333|444|555\n666|777|888|999|000|";
            final FileInputSplit split = createTempDeflateFile(fileContent);

            final Configuration parameters = new Configuration();

            format.setFieldDelimiter("|");
            format.setFieldTypesGeneric(IntValue.class, IntValue.class, IntValue.class, IntValue.class,
                    IntValue.class);

            format.configure(parameters);
            format.open(split);

            Value[] values = createIntValues(5);

            values = format.nextRecord(values);
            assertNotNull(values);
            assertEquals(111, ((IntValue) values[0]).getValue());
            assertEquals(222, ((IntValue) values[1]).getValue());
            assertEquals(333, ((IntValue) values[2]).getValue());
            assertEquals(444, ((IntValue) values[3]).getValue());
            assertEquals(555, ((IntValue) values[4]).getValue());

            values = format.nextRecord(values);
            assertNotNull(values);
            assertEquals(666, ((IntValue) values[0]).getValue());
            assertEquals(777, ((IntValue) values[1]).getValue());
            assertEquals(888, ((IntValue) values[2]).getValue());
            assertEquals(999, ((IntValue) values[3]).getValue());
            assertEquals(000, ((IntValue) values[4]).getValue());

            assertNull(format.nextRecord(values));
            assertTrue(format.reachedEnd());
        } catch (Exception ex) {
            fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
        }
    }

    @Test
    public void testReadNoPosAllGzip() throws IOException {
        try {
            final String fileContent = "111|222|333|444|555\n666|777|888|999|000|";
            final FileInputSplit split = createTempGzipFile(fileContent);

            final Configuration parameters = new Configuration();

            format.setFieldDelimiter("|");
            format.setFieldTypesGeneric(IntValue.class, IntValue.class, IntValue.class, IntValue.class,
                    IntValue.class);

            format.configure(parameters);
            format.open(split);

            Value[] values = createIntValues(5);

            values = format.nextRecord(values);
            assertNotNull(values);
            assertEquals(111, ((IntValue) values[0]).getValue());
            assertEquals(222, ((IntValue) values[1]).getValue());
            assertEquals(333, ((IntValue) values[2]).getValue());
            assertEquals(444, ((IntValue) values[3]).getValue());
            assertEquals(555, ((IntValue) values[4]).getValue());

            values = format.nextRecord(values);
            assertNotNull(values);
            assertEquals(666, ((IntValue) values[0]).getValue());
            assertEquals(777, ((IntValue) values[1]).getValue());
            assertEquals(888, ((IntValue) values[2]).getValue());
            assertEquals(999, ((IntValue) values[3]).getValue());
            assertEquals(000, ((IntValue) values[4]).getValue());

            assertNull(format.nextRecord(values));
            assertTrue(format.reachedEnd());
        } catch (Exception ex) {
            fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
        }
    }

    @Test
    public void testReadNoPosFirstN() throws IOException {
        try {
            final String fileContent = "111|222|333|444|555|\n666|777|888|999|000|";
            final FileInputSplit split = createTempFile(fileContent);

            final Configuration parameters = new Configuration();

            format.setFieldDelimiter("|");
            format.setFieldTypesGeneric(IntValue.class, IntValue.class);

            format.configure(parameters);
            format.open(split);

            Value[] values = createIntValues(2);

            // if this would parse all, we would get an index out of bounds exception
            values = format.nextRecord(values);
            assertNotNull(values);
            assertEquals(111, ((IntValue) values[0]).getValue());
            assertEquals(222, ((IntValue) values[1]).getValue());

            values = format.nextRecord(values);
            assertNotNull(values);
            assertEquals(666, ((IntValue) values[0]).getValue());
            assertEquals(777, ((IntValue) values[1]).getValue());

            assertNull(format.nextRecord(values));
            assertTrue(format.reachedEnd());
        } catch (Exception ex) {
            fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
        }

    }

    @Test
    public void testSparseParse() {
        try {
            final String fileContent = "111|222|333|444|555|666|777|888|999|000|\n"
                    + "000|999|888|777|666|555|444|333|222|111|";
            final FileInputSplit split = createTempFile(fileContent);

            final Configuration parameters = new Configuration();

            format.setFieldDelimiter("|");
            format.setFieldTypesGeneric(IntValue.class, null, null, IntValue.class, null, null, null,
                    IntValue.class);

            format.configure(parameters);
            format.open(split);

            Value[] values = createIntValues(3);

            values = format.nextRecord(values);
            assertNotNull(values);
            assertEquals(111, ((IntValue) values[0]).getValue());
            assertEquals(444, ((IntValue) values[1]).getValue());
            assertEquals(888, ((IntValue) values[2]).getValue());

            values = format.nextRecord(values);
            assertNotNull(values);
            assertEquals(000, ((IntValue) values[0]).getValue());
            assertEquals(777, ((IntValue) values[1]).getValue());
            assertEquals(333, ((IntValue) values[2]).getValue());

            assertNull(format.nextRecord(values));
            assertTrue(format.reachedEnd());
        } catch (Exception ex) {
            System.err.println(ex.getMessage());
            ex.printStackTrace();
            fail("Test erroneous");
        }
    }

    @Test
    public void testLongLongLong() {
        try {
            final String fileContent = "1,2,3\n3,2,1";
            final FileInputSplit split = createTempFile(fileContent);

            final Configuration parameters = new Configuration();

            format.setFieldDelimiter(",");
            format.setFieldTypesGeneric(LongValue.class, LongValue.class, LongValue.class);
            format.configure(parameters);
            format.open(split);

            Value[] values = createLongValues(3);

            values = format.nextRecord(values);
            assertNotNull(values);
            assertEquals(1L, ((LongValue) values[0]).getValue());
            assertEquals(2L, ((LongValue) values[1]).getValue());
            assertEquals(3L, ((LongValue) values[2]).getValue());

            values = format.nextRecord(values);
            assertNotNull(values);
            assertEquals(3L, ((LongValue) values[0]).getValue());
            assertEquals(2L, ((LongValue) values[1]).getValue());
            assertEquals(1L, ((LongValue) values[2]).getValue());

            assertNull(format.nextRecord(values));
            assertTrue(format.reachedEnd());
        } catch (Exception ex) {
            System.err.println(ex.getMessage());
            ex.printStackTrace();
            fail("Test erroneous");
        }
    }

    @SuppressWarnings("unchecked")
    @Test
    public void testSparseParseWithIndices() {
        try {
            final String fileContent = "111|222|333|444|555|666|777|888|999|000|\n000|999|888|777|666|555|444|333|222|111|";
            final FileInputSplit split = createTempFile(fileContent);

            final Configuration parameters = new Configuration();

            format.setFieldDelimiter("|");
            format.setFieldsGeneric(new int[] { 0, 3, 7 },
                    (Class<? extends Value>[]) new Class[] { IntValue.class, IntValue.class, IntValue.class });
            format.configure(parameters);
            format.open(split);

            Value[] values = createIntValues(3);

            values = format.nextRecord(values);
            assertNotNull(values);
            assertEquals(111, ((IntValue) values[0]).getValue());
            assertEquals(444, ((IntValue) values[1]).getValue());
            assertEquals(888, ((IntValue) values[2]).getValue());

            values = format.nextRecord(values);
            assertNotNull(values);
            assertEquals(000, ((IntValue) values[0]).getValue());
            assertEquals(777, ((IntValue) values[1]).getValue());
            assertEquals(333, ((IntValue) values[2]).getValue());

            assertNull(format.nextRecord(values));
            assertTrue(format.reachedEnd());
        } catch (Exception ex) {
            System.err.println(ex.getMessage());
            ex.printStackTrace();
            fail("Test erroneous");
        }
    }

    @SuppressWarnings("unchecked")
    @Test
    public void testSparseParseWithIndicesMultiCharDelimiter() {
        try {
            final String fileContent = "111|-|222|-|333|-|444|-|555|-|666|-|777|-|888|-|999|-|000|-|\n"
                    + "000|-|999|-|888|-|777|-|666|-|555|-|444|-|333|-|222|-|111\n"
                    + "555|-|999|-|888|-|111|-|666|-|555|-|444|-|777|-|222|-|111|-|\n"
                    + "22222|-|99999|-|8|-|99999999|-|6666666|-|5|-|4444|-|8|-|22222|-|1\n";

            final FileInputSplit split = createTempFile(fileContent);

            final Configuration parameters = new Configuration();

            format.setFieldDelimiter("|-|");
            format.setFieldsGeneric(new int[] { 0, 3, 7 },
                    (Class<? extends Value>[]) new Class[] { IntValue.class, IntValue.class, IntValue.class });
            format.configure(parameters);
            format.open(split);

            Value[] values = createIntValues(3);

            values = format.nextRecord(values);
            assertNotNull(values);
            assertEquals(111, ((IntValue) values[0]).getValue());
            assertEquals(444, ((IntValue) values[1]).getValue());
            assertEquals(888, ((IntValue) values[2]).getValue());

            values = format.nextRecord(values);
            assertNotNull(values);
            assertEquals(000, ((IntValue) values[0]).getValue());
            assertEquals(777, ((IntValue) values[1]).getValue());
            assertEquals(333, ((IntValue) values[2]).getValue());

            values = format.nextRecord(values);
            assertNotNull(values);
            assertEquals(555, ((IntValue) values[0]).getValue());
            assertEquals(111, ((IntValue) values[1]).getValue());
            assertEquals(777, ((IntValue) values[2]).getValue());

            values = format.nextRecord(values);
            assertNotNull(values);
            assertEquals(22222, ((IntValue) values[0]).getValue());
            assertEquals(99999999, ((IntValue) values[1]).getValue());
            assertEquals(8, ((IntValue) values[2]).getValue());

            assertNull(format.nextRecord(values));
            assertTrue(format.reachedEnd());
        } catch (Exception ex) {
            System.err.println(ex.getMessage());
            ex.printStackTrace();
            fail("Test erroneous");
        }
    }

    @Test
    public void testReadTooShortInput() throws IOException {
        try {
            final String fileContent = "111|222|333|444\n666|777|888|999";
            final FileInputSplit split = createTempFile(fileContent);

            final Configuration parameters = new Configuration();
            format.setFieldDelimiter("|");
            format.setFieldTypesGeneric(IntValue.class, IntValue.class, IntValue.class, IntValue.class,
                    IntValue.class);

            format.configure(parameters);
            format.open(split);

            Value[] values = createIntValues(5);

            try {
                format.nextRecord(values);
                fail("Should have thrown a parse exception on too short input.");
            } catch (ParseException e) {
                // all is well
            }
        } catch (Exception ex) {
            fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
        }
    }

    @Test
    public void testReadTooShortInputLenient() throws IOException {
        try {
            final String fileContent = "666|777|888|999|555\n111|222|333|444\n666|777|888|999|555";
            final FileInputSplit split = createTempFile(fileContent);

            final Configuration parameters = new Configuration();
            format.setFieldDelimiter("|");
            format.setFieldTypesGeneric(IntValue.class, IntValue.class, IntValue.class, IntValue.class,
                    IntValue.class);
            format.setLenient(true);

            format.configure(parameters);
            format.open(split);

            Value[] values = createIntValues(5);

            assertNotNull(format.nextRecord(values)); // line okay
            assertNull(format.nextRecord(values)); // line too short
            assertNotNull(format.nextRecord(values)); // line okay
        } catch (Exception ex) {
            fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
        }
    }

    @Test
    public void testReadInvalidContents() throws IOException {
        try {
            final String fileContent = "abc|222|def|444\nkkz|777|888|hhg";
            final FileInputSplit split = createTempFile(fileContent);

            final Configuration parameters = new Configuration();

            format.setFieldDelimiter("|");
            format.setFieldTypesGeneric(StringValue.class, IntValue.class, StringValue.class, IntValue.class);

            format.configure(parameters);
            format.open(split);

            Value[] values = new Value[] { new StringValue(), new IntValue(), new StringValue(), new IntValue() };

            assertNotNull(format.nextRecord(values));

            try {
                format.nextRecord(values);
                fail("Input format accepted on invalid input.");
            } catch (ParseException ignored) {
            }
        } catch (Exception ex) {
            fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
        }
    }

    @Test
    public void testReadInvalidContentsLenient() {
        try {
            final String fileContent = "abc|222|def|444\nkkz|777|888|hhg";
            final FileInputSplit split = createTempFile(fileContent);

            final Configuration parameters = new Configuration();

            format.setFieldDelimiter("|");
            format.setFieldTypesGeneric(StringValue.class, IntValue.class, StringValue.class, IntValue.class);
            format.setLenient(true);

            format.configure(parameters);
            format.open(split);

            Value[] values = new Value[] { new StringValue(), new IntValue(), new StringValue(), new IntValue() };

            assertNotNull(format.nextRecord(values));
            assertNull(format.nextRecord(values));
        } catch (Exception ex) {
            fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
        }
    }

    @Test
    public void testReadInvalidContentsLenientWithSkipping() {
        try {
            final String fileContent = "abc|dfgsdf|777|444\n" + // good line
                    "kkz|777|foobar|hhg\n" + // wrong data type in field
                    "kkz|777foobarhhg  \n" + // too short, a skipped field never ends
                    "xyx|ignored|42|\n"; // another good line
            final FileInputSplit split = createTempFile(fileContent);

            final Configuration parameters = new Configuration();

            format.setFieldDelimiter("|");
            format.setFieldTypesGeneric(StringValue.class, null, IntValue.class);
            format.setLenient(true);

            format.configure(parameters);
            format.open(split);

            Value[] values = new Value[] { new StringValue(), new IntValue() };

            assertNotNull(format.nextRecord(values));
            assertNull(format.nextRecord(values));
            assertNull(format.nextRecord(values));
            assertNotNull(format.nextRecord(values));
        } catch (Exception ex) {
            fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
        }
    }

    @Test
    public void testReadWithCharset() throws IOException {
        // Unicode row fragments
        String[] records = new String[] { "\u020e\u021f", "Flink", "\u020b\u020f" };

        // Unicode delimiter
        String delimiter = "\u05c0\u05c0";

        String fileContent = StringUtils.join(records, delimiter);

        // StringValueParser does not use charset so rely on StringParser
        GenericCsvInputFormat<String[]> format = new GenericCsvInputFormat<String[]>() {
            @Override
            public String[] readRecord(String[] target, byte[] bytes, int offset, int numBytes) throws IOException {
                return parseRecord(target, bytes, offset, numBytes) ? target : null;
            }
        };
        format.setFilePath("file:///some/file/that/will/not/be/read");

        for (String charset : new String[] { "UTF-8", "UTF-16BE", "UTF-16LE" }) {
            File tempFile = File.createTempFile("test_contents", "tmp");
            tempFile.deleteOnExit();

            // write string with proper encoding
            try (Writer out = new OutputStreamWriter(new FileOutputStream(tempFile), charset)) {
                out.write(fileContent);
            }

            FileInputSplit split = new FileInputSplit(0, new Path(tempFile.toURI().toString()), 0,
                    tempFile.length(), new String[] { "localhost" });

            format.setFieldDelimiter(delimiter);
            format.setFieldTypesGeneric(String.class, String.class, String.class);
            // use the same encoding to parse the file as used to read the file;
            // the field delimiter is reinterpreted when the charset is set
            format.setCharset(charset);
            format.configure(new Configuration());
            format.open(split);

            String[] values = new String[] { "", "", "" };
            values = format.nextRecord(values);

            // validate results
            assertNotNull(values);
            for (int i = 0; i < records.length; i++) {
                assertEquals(records[i], values[i]);
            }

            assertNull(format.nextRecord(values));
            assertTrue(format.reachedEnd());
        }

        format.close();
    }

    @Test
    public void readWithEmptyField() {
        try {
            final String fileContent = "abc|def|ghijk\nabc||hhg\n|||";
            final FileInputSplit split = createTempFile(fileContent);

            final Configuration parameters = new Configuration();

            format.setFieldDelimiter("|");
            format.setFieldTypesGeneric(StringValue.class, StringValue.class, StringValue.class);

            format.configure(parameters);
            format.open(split);

            Value[] values = new Value[] { new StringValue(), new StringValue(), new StringValue() };

            values = format.nextRecord(values);
            assertNotNull(values);
            assertEquals("abc", ((StringValue) values[0]).getValue());
            assertEquals("def", ((StringValue) values[1]).getValue());
            assertEquals("ghijk", ((StringValue) values[2]).getValue());

            values = format.nextRecord(values);
            assertNotNull(values);
            assertEquals("abc", ((StringValue) values[0]).getValue());
            assertEquals("", ((StringValue) values[1]).getValue());
            assertEquals("hhg", ((StringValue) values[2]).getValue());

            values = format.nextRecord(values);
            assertNotNull(values);
            assertEquals("", ((StringValue) values[0]).getValue());
            assertEquals("", ((StringValue) values[1]).getValue());
            assertEquals("", ((StringValue) values[2]).getValue());

        } catch (Exception ex) {
            fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
        }
    }

    @Test
    public void readWithParseQuotedStrings() {
        try {
            final String fileContent = "\"ab\\\"c\"|\"def\"\n\"ghijk\"|\"abc\"";
            final FileInputSplit split = createTempFile(fileContent);

            final Configuration parameters = new Configuration();

            format.setFieldDelimiter("|");
            format.setFieldTypesGeneric(StringValue.class, StringValue.class);
            format.enableQuotedStringParsing('"');

            format.configure(parameters);
            format.open(split);

            Value[] values = new Value[] { new StringValue(), new StringValue() };

            values = format.nextRecord(values);
            assertNotNull(values);
            assertEquals("ab\\\"c", ((StringValue) values[0]).getValue());
            assertEquals("def", ((StringValue) values[1]).getValue());

            values = format.nextRecord(values);
            assertNotNull(values);
            assertEquals("ghijk", ((StringValue) values[0]).getValue());
            assertEquals("abc", ((StringValue) values[1]).getValue());

        } catch (Exception ex) {
            fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
        }
    }

    @Test
    public void readWithHeaderLine() {
        try {
            final String fileContent = "colname-1|colname-2|some name 3|column four|\n" + "123|abc|456|def|\n"
                    + "987|xyz|654|pqr|\n";

            final FileInputSplit split = createTempFile(fileContent);

            final Configuration parameters = new Configuration();

            format.setFieldDelimiter("|");
            format.setFieldTypesGeneric(IntValue.class, StringValue.class, IntValue.class, StringValue.class);
            format.setSkipFirstLineAsHeader(true);

            format.configure(parameters);
            format.open(split);

            Value[] values = new Value[] { new IntValue(), new StringValue(), new IntValue(), new StringValue() };

            // first line is skipped as header
            assertNotNull(format.nextRecord(values)); //  first row (= second line)
            assertNotNull(format.nextRecord(values)); // second row (= third line) 
            assertNull(format.nextRecord(values)); // exhausted
            assertTrue(format.reachedEnd()); // exhausted
        } catch (Exception ex) {
            fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
        }
    }

    @Test
    public void readWithHeaderLineAndInvalidIntermediate() {
        try {
            final String fileContent = "colname-1|colname-2|some name 3|column four|\n" + "123|abc|456|def|\n"
                    + "colname-1|colname-2|some name 3|column four|\n" + // repeated header in the middle
                    "987|xyz|654|pqr|\n";

            final FileInputSplit split = createTempFile(fileContent);

            final Configuration parameters = new Configuration();

            format.setFieldDelimiter("|");
            format.setFieldTypesGeneric(IntValue.class, StringValue.class, IntValue.class, StringValue.class);
            format.setSkipFirstLineAsHeader(true);

            format.configure(parameters);
            format.open(split);

            Value[] values = new Value[] { new IntValue(), new StringValue(), new IntValue(), new StringValue() };

            // first line is skipped as header
            assertNotNull(format.nextRecord(values)); //  first row (= second line)

            try {
                format.nextRecord(values);
                fail("Format accepted invalid line.");
            } catch (ParseException e) {
                // as we expected
            }
        } catch (Exception ex) {
            fail("Test failed due to a " + ex.getClass().getSimpleName() + ": " + ex.getMessage());
        }
    }

    private FileInputSplit createTempDeflateFile(String content) throws IOException {
        File tempFile = File.createTempFile("test_contents", "tmp.deflate");
        tempFile.deleteOnExit();

        DataOutputStream dos = new DataOutputStream(new DeflaterOutputStream(new FileOutputStream(tempFile)));
        dos.writeBytes(content);
        dos.close();

        return new FileInputSplit(0, new Path(tempFile.toURI().toString()), 0, tempFile.length(),
                new String[] { "localhost" });
    }

    private FileInputSplit createTempGzipFile(String content) throws IOException {
        File tempFile = File.createTempFile("test_contents", "tmp.gz");
        tempFile.deleteOnExit();

        DataOutputStream dos = new DataOutputStream(new GZIPOutputStream(new FileOutputStream(tempFile)));
        dos.writeBytes(content);
        dos.close();

        return new FileInputSplit(0, new Path(tempFile.toURI().toString()), 0, tempFile.length(),
                new String[] { "localhost" });
    }

    private Value[] createIntValues(int num) {
        Value[] v = new Value[num];

        for (int i = 0; i < num; i++) {
            v[i] = new IntValue();
        }

        return v;
    }

    private Value[] createLongValues(int num) {
        Value[] v = new Value[num];

        for (int i = 0; i < num; i++) {
            v[i] = new LongValue();
        }

        return v;
    }

    private static final class TestCsvInputFormat extends GenericCsvInputFormat<Value[]> {

        private static final long serialVersionUID = 2653609265252951059L;

        @Override
        public Value[] readRecord(Value[] target, byte[] bytes, int offset, int numBytes) {
            return parseRecord(target, bytes, offset, numBytes) ? target : null;
        }
    }

}