org.apache.storm.hdfs.spout.TestHdfsSpout.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.storm.hdfs.spout.TestHdfsSpout.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.storm.hdfs.spout;

import org.apache.storm.Config;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.storm.hdfs.common.HdfsUtils;
import org.junit.AfterClass;
import org.junit.Assert;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.junit.Before;
import org.junit.After;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.storm.hdfs.common.HdfsUtils.Pair;
import sun.reflect.generics.reflectiveObjects.NotImplementedException;

public class TestHdfsSpout {

    @Rule
    public TemporaryFolder tempFolder = new TemporaryFolder();
    public File baseFolder;
    private Path source;
    private Path archive;
    private Path badfiles;

    public TestHdfsSpout() {
    }

    static MiniDFSCluster.Builder builder;
    static MiniDFSCluster hdfsCluster;
    static DistributedFileSystem fs;
    static String hdfsURI;
    static Configuration conf = new Configuration();

    @BeforeClass
    public static void setupClass() throws IOException {
        builder = new MiniDFSCluster.Builder(new Configuration());
        hdfsCluster = builder.build();
        fs = hdfsCluster.getFileSystem();
        hdfsURI = "hdfs://localhost:" + hdfsCluster.getNameNodePort() + "/";
    }

    @AfterClass
    public static void teardownClass() throws IOException {
        fs.close();
        hdfsCluster.shutdown();
    }

    @Before
    public void setup() throws Exception {
        baseFolder = tempFolder.newFolder("hdfsspout");
        source = new Path(baseFolder.toString() + "/source");
        fs.mkdirs(source);
        archive = new Path(baseFolder.toString() + "/archive");
        fs.mkdirs(archive);
        badfiles = new Path(baseFolder.toString() + "/bad");
        fs.mkdirs(badfiles);
    }

    @After
    public void shutDown() throws IOException {
        fs.delete(new Path(baseFolder.toString()), true);
    }

    @Test
    public void testSimpleText_noACK() throws IOException {
        Path file1 = new Path(source.toString() + "/file1.txt");
        createTextFile(file1, 5);

        Path file2 = new Path(source.toString() + "/file2.txt");
        createTextFile(file2, 5);

        Map conf = getDefaultConfig();
        conf.put(Configs.COMMIT_FREQ_COUNT, "1");
        conf.put(Configs.COMMIT_FREQ_SEC, "1");

        HdfsSpout spout = makeSpout(0, conf, Configs.TEXT, TextFileReader.defaultFields);

        runSpout(spout, "r11");

        Path arc1 = new Path(archive.toString() + "/file1.txt");
        Path arc2 = new Path(archive.toString() + "/file2.txt");
        checkCollectorOutput_txt((MockCollector) spout.getCollector(), arc1, arc2);
    }

    @Test
    public void testSimpleText_ACK() throws IOException {
        Path file1 = new Path(source.toString() + "/file1.txt");
        createTextFile(file1, 5);

        Path file2 = new Path(source.toString() + "/file2.txt");
        createTextFile(file2, 5);

        Map conf = getDefaultConfig();
        conf.put(Configs.COMMIT_FREQ_COUNT, "1");
        conf.put(Configs.COMMIT_FREQ_SEC, "1");
        conf.put(Config.TOPOLOGY_ACKER_EXECUTORS, "1"); // enable acking
        HdfsSpout spout = makeSpout(0, conf, Configs.TEXT, TextFileReader.defaultFields);

        // consume file 1
        runSpout(spout, "r6", "a0", "a1", "a2", "a3", "a4");
        Path arc1 = new Path(archive.toString() + "/file1.txt");
        checkCollectorOutput_txt((MockCollector) spout.getCollector(), arc1);

        // consume file 2
        runSpout(spout, "r6", "a5", "a6", "a7", "a8", "a9");
        Path arc2 = new Path(archive.toString() + "/file2.txt");
        checkCollectorOutput_txt((MockCollector) spout.getCollector(), arc1, arc2);
    }

    @Test
    public void testResumeAbandoned_Text_NoAck() throws Exception {
        Path file1 = new Path(source.toString() + "/file1.txt");
        createTextFile(file1, 6);

        final Integer lockExpirySec = 1;
        Map conf = getDefaultConfig();
        conf.put(Configs.COMMIT_FREQ_COUNT, "1");
        conf.put(Configs.COMMIT_FREQ_SEC, "1000"); // basically disable it
        conf.put(Configs.LOCK_TIMEOUT, lockExpirySec.toString());
        HdfsSpout spout = makeSpout(0, conf, Configs.TEXT, TextFileReader.defaultFields);
        HdfsSpout spout2 = makeSpout(1, conf, Configs.TEXT, TextFileReader.defaultFields);

        // consume file 1 partially
        List<String> res = runSpout(spout, "r2");
        Assert.assertEquals(2, res.size());
        // abandon file
        FileLock lock = getField(spout, "lock");
        TestFileLock.closeUnderlyingLockFile(lock);
        Thread.sleep(lockExpirySec * 2 * 1000);

        // check lock file presence
        Assert.assertTrue(fs.exists(lock.getLockFile()));

        // create another spout to take over processing and read a few lines
        List<String> res2 = runSpout(spout2, "r3");
        Assert.assertEquals(3, res2.size());

        // check lock file presence
        Assert.assertTrue(fs.exists(lock.getLockFile()));

        // check lock file contents
        List<String> contents = readTextFile(fs, lock.getLockFile().toString());
        Assert.assertFalse(contents.isEmpty());

        // finish up reading the file
        res2 = runSpout(spout2, "r2");
        Assert.assertEquals(4, res2.size());

        // check lock file is gone
        Assert.assertFalse(fs.exists(lock.getLockFile()));
        FileReader rdr = getField(spout2, "reader");
        Assert.assertNull(rdr);
        Assert.assertTrue(getBoolField(spout2, "fileReadCompletely"));

    }

    @Test
    public void testResumeAbandoned_Seq_NoAck() throws Exception {
        Path file1 = new Path(source.toString() + "/file1.seq");
        createSeqFile(fs, file1, 6);

        final Integer lockExpirySec = 1;
        Map conf = getDefaultConfig();
        conf.put(Configs.COMMIT_FREQ_COUNT, "1");
        conf.put(Configs.COMMIT_FREQ_SEC, "1000"); // basically disable it
        conf.put(Configs.LOCK_TIMEOUT, lockExpirySec.toString());
        HdfsSpout spout = makeSpout(0, conf, Configs.SEQ, SequenceFileReader.defaultFields);
        HdfsSpout spout2 = makeSpout(1, conf, Configs.SEQ, SequenceFileReader.defaultFields);

        // consume file 1 partially
        List<String> res = runSpout(spout, "r2");
        Assert.assertEquals(2, res.size());
        // abandon file
        FileLock lock = getField(spout, "lock");
        TestFileLock.closeUnderlyingLockFile(lock);
        Thread.sleep(lockExpirySec * 2 * 1000);

        // check lock file presence
        Assert.assertTrue(fs.exists(lock.getLockFile()));

        // create another spout to take over processing and read a few lines
        List<String> res2 = runSpout(spout2, "r3");
        Assert.assertEquals(3, res2.size());

        // check lock file presence
        Assert.assertTrue(fs.exists(lock.getLockFile()));

        // check lock file contents
        List<String> contents = getTextFileContents(fs, lock.getLockFile());
        Assert.assertFalse(contents.isEmpty());

        // finish up reading the file
        res2 = runSpout(spout2, "r3");
        Assert.assertEquals(4, res2.size());

        // check lock file is gone
        Assert.assertFalse(fs.exists(lock.getLockFile()));
        FileReader rdr = getField(spout2, "reader");
        Assert.assertNull(rdr);
        Assert.assertTrue(getBoolField(spout2, "fileReadCompletely"));
    }

    private void checkCollectorOutput_txt(MockCollector collector, Path... txtFiles) throws IOException {
        ArrayList<String> expected = new ArrayList<>();
        for (Path txtFile : txtFiles) {
            List<String> lines = getTextFileContents(fs, txtFile);
            expected.addAll(lines);
        }

        List<String> actual = new ArrayList<>();
        for (Pair<HdfsSpout.MessageId, List<Object>> item : collector.items) {
            actual.add(item.getValue().get(0).toString());
        }
        Assert.assertEquals(expected, actual);
    }

    private List<String> getTextFileContents(FileSystem fs, Path txtFile) throws IOException {
        ArrayList<String> result = new ArrayList<>();
        FSDataInputStream istream = fs.open(txtFile);
        InputStreamReader isreader = new InputStreamReader(istream, "UTF-8");
        BufferedReader reader = new BufferedReader(isreader);

        for (String line = reader.readLine(); line != null; line = reader.readLine()) {
            result.add(line);
        }
        isreader.close();
        return result;
    }

    private void checkCollectorOutput_seq(MockCollector collector, Path... seqFiles) throws IOException {
        ArrayList<String> expected = new ArrayList<>();
        for (Path seqFile : seqFiles) {
            List<String> lines = getSeqFileContents(fs, seqFile);
            expected.addAll(lines);
        }
        Assert.assertTrue(expected.equals(collector.lines));
    }

    private List<String> getSeqFileContents(FileSystem fs, Path... seqFiles) throws IOException {
        ArrayList<String> result = new ArrayList<>();

        for (Path seqFile : seqFiles) {
            Path file = new Path(fs.getUri().toString() + seqFile.toString());
            SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(file));
            try {
                Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
                Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
                while (reader.next(key, value)) {
                    String keyValStr = Arrays.asList(key, value).toString();
                    result.add(keyValStr);
                }
            } finally {
                reader.close();
            }
        } // for
        return result;
    }

    private List<String> listDir(Path p) throws IOException {
        ArrayList<String> result = new ArrayList<>();
        RemoteIterator<LocatedFileStatus> fileNames = fs.listFiles(p, false);
        while (fileNames.hasNext()) {
            LocatedFileStatus fileStatus = fileNames.next();
            result.add(Path.getPathWithoutSchemeAndAuthority(fileStatus.getPath()).toString());
        }
        return result;
    }

    @Test
    public void testMultipleFileConsumption_Ack() throws Exception {
        Path file1 = new Path(source.toString() + "/file1.txt");
        createTextFile(file1, 5);

        Map conf = getDefaultConfig();
        conf.put(Configs.COMMIT_FREQ_COUNT, "1");
        conf.put(Configs.COMMIT_FREQ_SEC, "1");
        conf.put(Config.TOPOLOGY_ACKER_EXECUTORS, "1"); // enable ACKing
        HdfsSpout spout = makeSpout(0, conf, Configs.TEXT, TextFileReader.defaultFields);

        // read few lines from file1 dont ack
        runSpout(spout, "r3");
        FileReader reader = getField(spout, "reader");
        Assert.assertNotNull(reader);
        Assert.assertEquals(false, getBoolField(spout, "fileReadCompletely"));

        // read remaining lines
        runSpout(spout, "r3");
        reader = getField(spout, "reader");
        Assert.assertNotNull(reader);
        Assert.assertEquals(true, getBoolField(spout, "fileReadCompletely"));

        // ack few
        runSpout(spout, "a0", "a1", "a2");
        reader = getField(spout, "reader");
        Assert.assertNotNull(reader);
        Assert.assertEquals(true, getBoolField(spout, "fileReadCompletely"));

        //ack rest
        runSpout(spout, "a3", "a4");
        reader = getField(spout, "reader");
        Assert.assertNull(reader);
        Assert.assertEquals(true, getBoolField(spout, "fileReadCompletely"));

        // go to next file
        Path file2 = new Path(source.toString() + "/file2.txt");
        createTextFile(file2, 5);

        // Read 1 line
        runSpout(spout, "r1");
        Assert.assertNotNull(getField(spout, "reader"));
        Assert.assertEquals(false, getBoolField(spout, "fileReadCompletely"));

        // ack 1 tuple
        runSpout(spout, "a5");
        Assert.assertNotNull(getField(spout, "reader"));
        Assert.assertEquals(false, getBoolField(spout, "fileReadCompletely"));

        // read and ack remaining lines
        runSpout(spout, "r5", "a6", "a7", "a8", "a9");
        Assert.assertNull(getField(spout, "reader"));
        Assert.assertEquals(true, getBoolField(spout, "fileReadCompletely"));
    }

    private static <T> T getField(HdfsSpout spout, String fieldName)
            throws NoSuchFieldException, IllegalAccessException {
        Field readerFld = HdfsSpout.class.getDeclaredField(fieldName);
        readerFld.setAccessible(true);
        return (T) readerFld.get(spout);
    }

    private static boolean getBoolField(HdfsSpout spout, String fieldName)
            throws NoSuchFieldException, IllegalAccessException {
        Field readerFld = HdfsSpout.class.getDeclaredField(fieldName);
        readerFld.setAccessible(true);
        return readerFld.getBoolean(spout);
    }

    @Test
    public void testSimpleSequenceFile() throws IOException {
        //1) create a couple files to consume
        source = new Path("/tmp/hdfsspout/source");
        fs.mkdirs(source);
        archive = new Path("/tmp/hdfsspout/archive");
        fs.mkdirs(archive);

        Path file1 = new Path(source + "/file1.seq");
        createSeqFile(fs, file1, 5);

        Path file2 = new Path(source + "/file2.seq");
        createSeqFile(fs, file2, 5);

        Map conf = getDefaultConfig();
        HdfsSpout spout = makeSpout(0, conf, Configs.SEQ, SequenceFileReader.defaultFields);

        // consume both files
        List<String> res = runSpout(spout, "r11");
        Assert.assertEquals(10, res.size());

        Assert.assertEquals(2, listDir(archive).size());

        Path f1 = new Path(archive + "/file1.seq");
        Path f2 = new Path(archive + "/file2.seq");

        checkCollectorOutput_seq((MockCollector) spout.getCollector(), f1, f2);
    }

    @Test
    public void testReadFailures() throws Exception {
        // 1) create couple of input files to read
        Path file1 = new Path(source.toString() + "/file1.txt");
        Path file2 = new Path(source.toString() + "/file2.txt");

        createTextFile(file1, 6);
        createTextFile(file2, 7);
        Assert.assertEquals(2, listDir(source).size());

        // 2) run spout
        Map conf = getDefaultConfig();
        HdfsSpout spout = makeSpout(0, conf, MockTextFailingReader.class.getName(),
                MockTextFailingReader.defaultFields);
        List<String> res = runSpout(spout, "r11");
        String[] expected = new String[] { "[line 0]", "[line 1]", "[line 2]", "[line 0]", "[line 1]", "[line 2]" };
        Assert.assertArrayEquals(expected, res.toArray());

        // 3) make sure 6 lines (3 from each file) were read in all
        Assert.assertEquals(((MockCollector) spout.getCollector()).lines.size(), 6);
        ArrayList<Path> badFiles = HdfsUtils.listFilesByModificationTime(fs, badfiles, 0);
        Assert.assertEquals(badFiles.size(), 2);
    }

    // check lock creation/deletion and contents
    @Test
    public void testLocking() throws Exception {
        Path file1 = new Path(source.toString() + "/file1.txt");
        createTextFile(file1, 10);

        // 0) config spout to log progress in lock file for each tuple
        Map conf = getDefaultConfig();
        conf.put(Configs.COMMIT_FREQ_COUNT, "1");
        conf.put(Configs.COMMIT_FREQ_SEC, "100"); // make it irrelvant
        HdfsSpout spout = makeSpout(0, conf, Configs.TEXT, TextFileReader.defaultFields);

        // 1) read initial lines in file, then check if lock exists
        List<String> res = runSpout(spout, "r5");
        Assert.assertEquals(5, res.size());
        List<String> lockFiles = listDir(spout.getLockDirPath());
        Assert.assertEquals(1, lockFiles.size());

        // 2) check log file content line count == tuples emitted + 1
        List<String> lines = readTextFile(fs, lockFiles.get(0));
        Assert.assertEquals(lines.size(), res.size() + 1);

        // 3) read remaining lines in file, then ensure lock is gone
        runSpout(spout, "r6");
        lockFiles = listDir(spout.getLockDirPath());
        Assert.assertEquals(0, lockFiles.size());

        // 4)  --- Create another input file and reverify same behavior ---
        Path file2 = new Path(source.toString() + "/file2.txt");
        createTextFile(file2, 10);

        // 5) read initial lines in file, then check if lock exists
        res = runSpout(spout, "r5");
        Assert.assertEquals(15, res.size());
        lockFiles = listDir(spout.getLockDirPath());
        Assert.assertEquals(1, lockFiles.size());

        // 6) check log file content line count == tuples emitted + 1
        lines = readTextFile(fs, lockFiles.get(0));
        Assert.assertEquals(6, lines.size());

        // 7) read remaining lines in file, then ensure lock is gone
        runSpout(spout, "r6");
        lockFiles = listDir(spout.getLockDirPath());
        Assert.assertEquals(0, lockFiles.size());
    }

    @Test
    public void testLockLoggingFreqCount() throws Exception {
        Path file1 = new Path(source.toString() + "/file1.txt");
        createTextFile(file1, 10);

        // 0) config spout to log progress in lock file for each tuple
        Map conf = getDefaultConfig();
        conf.put(Configs.COMMIT_FREQ_COUNT, "2"); // 1 lock log entry every 2 tuples
        conf.put(Configs.COMMIT_FREQ_SEC, "1000"); // make it irrelevant for this test
        HdfsSpout spout = makeSpout(0, conf, Configs.TEXT, TextFileReader.defaultFields);

        // 1) read 5 lines in file,
        runSpout(spout, "r5");

        // 2) check log file contents
        String lockFile = listDir(spout.getLockDirPath()).get(0);
        List<String> lines = readTextFile(fs, lockFile);
        Assert.assertEquals(lines.size(), 3);

        // 3) read 6th line and see if another log entry was made
        runSpout(spout, "r1");
        lines = readTextFile(fs, lockFile);
        Assert.assertEquals(lines.size(), 4);
    }

    @Test
    public void testLockLoggingFreqSec() throws Exception {
        Path file1 = new Path(source.toString() + "/file1.txt");
        createTextFile(file1, 10);

        // 0) config spout to log progress in lock file for each tuple
        Map conf = getDefaultConfig();
        conf.put(Configs.COMMIT_FREQ_COUNT, "0"); // disable it
        conf.put(Configs.COMMIT_FREQ_SEC, "2"); // log every 2 sec

        HdfsSpout spout = makeSpout(0, conf, Configs.TEXT, TextFileReader.defaultFields);

        // 1) read 5 lines in file
        runSpout(spout, "r5");

        // 2) check log file contents
        String lockFile = listDir(spout.getLockDirPath()).get(0);
        List<String> lines = readTextFile(fs, lockFile);
        Assert.assertEquals(lines.size(), 1);
        Thread.sleep(3000); // allow freq_sec to expire

        // 3) read another line and see if another log entry was made
        runSpout(spout, "r1");
        lines = readTextFile(fs, lockFile);
        Assert.assertEquals(2, lines.size());
    }

    private static List<String> readTextFile(FileSystem fs, String f) throws IOException {
        Path file = new Path(f);
        FSDataInputStream x = fs.open(file);
        BufferedReader reader = new BufferedReader(new InputStreamReader(x));
        String line = null;
        ArrayList<String> result = new ArrayList<>();
        while ((line = reader.readLine()) != null)
            result.add(line);
        return result;
    }

    private Map getDefaultConfig() {
        Map conf = new HashMap();
        conf.put(Configs.SOURCE_DIR, source.toString());
        conf.put(Configs.ARCHIVE_DIR, archive.toString());
        conf.put(Configs.BAD_DIR, badfiles.toString());
        conf.put(Configs.HDFS_URI, hdfsCluster.getURI().toString());
        conf.put(Config.TOPOLOGY_ACKER_EXECUTORS, "0");
        return conf;
    }

    private static HdfsSpout makeSpout(int spoutId, Map conf, String readerType, String[] outputFields) {
        HdfsSpout spout = new HdfsSpout().withOutputFields(outputFields);
        MockCollector collector = new MockCollector();
        conf.put(Configs.READER_TYPE, readerType);
        spout.open(conf, new MockTopologyContext(spoutId), collector);
        conf.put(Configs.HDFS_URI, hdfsCluster.getURI().toString());
        return spout;
    }

    /**
     * Execute a sequence of calls on HdfsSpout.
     *
     * @param cmds: set of commands to run,
     * e.g. "r,r,r,r,a1,f2,...". The commands are:
     * r[N] -  receive() called N times
     * aN - ack, item number: N
     * fN - fail, item number: N
     */

    private List<String> runSpout(HdfsSpout spout, String... cmds) {
        MockCollector collector = (MockCollector) spout.getCollector();
        for (String cmd : cmds) {
            if (cmd.startsWith("r")) {
                int count = 1;
                if (cmd.length() > 1) {
                    count = Integer.parseInt(cmd.substring(1));
                }
                for (int i = 0; i < count; ++i) {
                    spout.nextTuple();
                }
            } else if (cmd.startsWith("a")) {
                int n = Integer.parseInt(cmd.substring(1));
                Pair<HdfsSpout.MessageId, List<Object>> item = collector.items.get(n);
                spout.ack(item.getKey());
            } else if (cmd.startsWith("f")) {
                int n = Integer.parseInt(cmd.substring(1));
                Pair<HdfsSpout.MessageId, List<Object>> item = collector.items.get(n);
                spout.fail(item.getKey());
            }
        }
        return collector.lines;
    }

    private void createTextFile(Path file, int lineCount) throws IOException {
        FSDataOutputStream os = fs.create(file);
        int size = 0;
        for (int i = 0; i < lineCount; i++) {
            os.writeBytes("line " + i + System.lineSeparator());
            String msg = "line " + i + System.lineSeparator();
            size += msg.getBytes().length;
        }
        os.close();
    }

    private static void createSeqFile(FileSystem fs, Path file, int rowCount) throws IOException {

        Configuration conf = new Configuration();
        try {
            if (fs.exists(file)) {
                fs.delete(file, false);
            }

            SequenceFile.Writer w = SequenceFile.createWriter(fs, conf, file, IntWritable.class, Text.class);
            for (int i = 0; i < rowCount; i++) {
                w.append(new IntWritable(i), new Text("line " + i));
            }
            w.close();
            System.out.println("done");
        } catch (IOException e) {
            e.printStackTrace();

        }
    }

    static class MockCollector extends SpoutOutputCollector {
        //comma separated offsets
        public ArrayList<String> lines;
        public ArrayList<Pair<HdfsSpout.MessageId, List<Object>>> items;

        public MockCollector() {
            super(null);
            lines = new ArrayList<>();
            items = new ArrayList<>();
        }

        @Override
        public List<Integer> emit(String streamId, List<Object> tuple, Object messageId) {
            lines.add(tuple.toString());
            items.add(HdfsUtils.Pair.of(messageId, tuple));
            return null;
        }

        @Override
        public void emitDirect(int arg0, String arg1, List<Object> arg2, Object arg3) {
            throw new NotImplementedException();
        }

        @Override
        public void reportError(Throwable arg0) {
            throw new NotImplementedException();
        }

        @Override
        public long getPendingCount() {
            return 0;
        }
    } // class MockCollector

    // Throws IOExceptions for 3rd & 4th call to next(), succeeds on 5th, thereafter
    // throws ParseException. Effectively produces 3 lines (1,2 & 3) from each file read
    static class MockTextFailingReader extends TextFileReader {
        public static final String[] defaultFields = { "line" };
        int readAttempts = 0;

        public MockTextFailingReader(FileSystem fs, Path file, Map conf) throws IOException {
            super(fs, file, conf);
        }

        @Override
        public List<Object> next() throws IOException, ParseException {
            readAttempts++;
            if (readAttempts == 3 || readAttempts == 4) {
                throw new IOException("mock test exception");
            } else if (readAttempts > 5) {
                throw new ParseException("mock test exception", null);
            }
            return super.next();
        }
    }

    static class MockTopologyContext extends TopologyContext {
        private final int componentId;

        public MockTopologyContext(int componentId) {
            // StormTopology topology, Map stormConf, Map<Integer, String> taskToComponent, Map<String, List<Integer>> componentToSortedTasks, Map<String, Map<String, Fields>> componentToStreamToFields, String stormId, String codeDir, String pidDir, Integer taskId, Integer workerPort, List<Integer> workerTasks, Map<String, Object> defaultResources, Map<String, Object> userResources, Map<String, Object> executorData, Map<Integer, Map<Integer, Map<String, IMetric>>> registeredMetrics, Atom openOrPrepareWasCalled
            super(null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);
            this.componentId = componentId;
        }

        public String getThisComponentId() {
            return Integer.toString(componentId);
        }

    }

}