biz.c24.io.spring.batch.reader.C24ItemReader.java Source code

Java tutorial

Introduction

Here is the source code for biz.c24.io.spring.batch.reader.C24ItemReader.java

Source

/*
 * Copyright 2012 C24 Technologies.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package biz.c24.io.spring.batch.reader;

import biz.c24.io.api.data.ComplexDataObject;
import biz.c24.io.api.data.Element;
import biz.c24.io.api.data.ValidationEvent;
import biz.c24.io.api.data.ValidationException;
import biz.c24.io.api.data.ValidationListener;
import biz.c24.io.api.data.ValidationManager;
import biz.c24.io.api.presentation.Source;
import biz.c24.io.api.presentation.TextualSource;
import biz.c24.io.spring.batch.C24CompoundValidationException;
import biz.c24.io.spring.batch.reader.source.SplittingReaderSource;
import biz.c24.io.spring.batch.reader.source.SplittingReader;
import biz.c24.io.spring.core.C24Model;
import biz.c24.io.spring.source.SourceFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.batch.core.StepExecution;
import org.springframework.batch.core.annotation.AfterStep;
import org.springframework.batch.core.annotation.BeforeStep;
import org.springframework.batch.item.ItemReader;
import org.springframework.batch.item.NonTransientResourceException;
import org.springframework.batch.item.ParseException;
import org.springframework.batch.item.UnexpectedInputException;
import org.springframework.util.Assert;

import javax.annotation.PostConstruct;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Collection;
import java.util.LinkedList;
import java.util.regex.Pattern;

/**
 * ItemReader that reads ComplexDataObjects from a SplittingReaderSource.
 * Optionally supports the ability to split the incoming data stream into entities by use of a
 * regular expression to detect the start of a new entity; this allows the more expensive parsing 
 * to be performed in parallel.
 * 
 * The optional splitting process currently assumes that each line:
 * a) Is terminated with a platform specific CRLF (or equivalent)
 * b) Belongs to at most one entity
 * 
 * In all cases the optional validation takes place in parallel if multiple threads are used.
 * 
 * @author Andrew Elmore
 */
public class C24ItemReader<Result> implements ItemReader<Result> {

    private static Logger LOG = LoggerFactory.getLogger(C24ItemReader.class);

    /**
     * SourceFactory to use to generate our IO Sources
     */
    private SourceFactory ioSourceFactory = null;

    /**
     * Parser to use where we do not have an elementStartPattern
     */
    private volatile Parser parser = null;
    /**
     * Cache for parsers where we can parallelise parsing
     */
    private ThreadLocal<Parser> threadedParser = new ThreadLocal<Parser>();

    /**
     * The type of CDO that we will parse from the source
     */
    private Element elementType;

    /**
     * An optional pattern to use to quickly split the readerSource so we can perform more heavyweight
     * parsing in parallel
     */
    private Pattern elementStartPattern = null;

    /**
     * An optional pattern to use to identify the end of a message. If specified, the message must end with an
     * EOF or this pattern. Additional matches of the startPattern before presence of the stop pattern will
     * not trigger the start of a new message
     */
    private Pattern elementStopPattern = null;

    /**
     * The source from which we'll read the data
     */
    private SplittingReaderSource source;

    /**
     * Control whether or not we validate the parsed CDOs
     */
    private ThreadLocal<ValidationManager> validator = null;

    /**
     * If we're validating, do we failfast or collect all failures?
     */
    private boolean failfast = true;

    /**
     * Allow clients to register a callback to intercept elements as we read them.
     */
    private ParseListener<Object, Result> parseListener = null;

    public C24ItemReader() {

    }

    /**
     * Asserts that we have been properly configured
     */
    @PostConstruct
    public void validateConfiguration() {
        Assert.notNull(elementType, "Element type must be set, either explicitly or by setting the model");
        Assert.notNull(source, "Source must be set");
        if (elementStopPattern != null) {
            Assert.notNull(elementStartPattern,
                    "elementStopPattern can only be used if an elementStartPattern is also set");
        }
    }

    /**
     * Get the parser listener registered with this C24ItemReader (if any)
     * @return The currently registered ParseListener, null if there isn't one.
     */
    public ParseListener<Object, Result> getParseListener() {
        return parseListener;
    }

    /**
     * Registers a ParseListener
     * @param parseListener The object which should receive the callbacks, null to remove an existing ParseListener
     */
    public void setParseListener(ParseListener<Object, Result> parseListener) {
        this.parseListener = parseListener;
    }

    /**
     * Returns the element type that we will attempt to parse from the source
     */
    public Element getElementType() {
        return elementType;
    }

    /**
     * Set the type of element that we will attempt to parse from the source
     * 
     * @param elementType The type of element that we want to parse from the source
     */
    public void setElementType(Element elementType) {
        this.elementType = elementType;
    }

    /**
     * Allows setting of element type via the supplied model
     * 
     * @param model The model of the type we wish to parse
     */
    public void setModel(C24Model model) {
        elementType = model.getRootElement();
    }

    /**
     * Returns the regular expression that we're using to split up in the incoming data.
     * Null if not set.
     */
    public String getElementStartPattern() {
        return elementStartPattern != null ? elementStartPattern.pattern() : null;
    }

    /**
     * Sets the regular expression used to quickly split up the source into individual entities for parsing
     * 
     * @param elementStartRegEx The regular expression to identify the start of a new entity in the source
     */
    public void setElementStartPattern(String elementStartRegEx) {
        this.elementStartPattern = Pattern.compile(elementStartRegEx, Pattern.DOTALL);
    }

    /**
     * Returns the pattern we're using to to determine the end of a message.
     * 
     * @return end of element pattern. Null if not set.
     */
    public String getElementStopPattern() {
        return elementStopPattern != null ? elementStopPattern.pattern() : null;
    }

    /**
     * In conjunction with the element start regex, used to detect the end of a message. Note that it is possible for a single
     * line to match both the start and stop patterns and hence be a complete element on its own.
     * 
     * @param elementStopRegEx The regular expression to identify the end of an entity in the source
     */
    public void setElementStopPattern(String elementStopRegEx) {
        this.elementStopPattern = Pattern.compile(elementStopRegEx, Pattern.DOTALL);
    }

    /**
     * Set whether or not you want validation to be performed on the parsed CDOs. 
     * An exception will be thrown for any entity which fails validation.
     * 
     * @param validate Whether or not to validate parsed CDOs
     */
    public void setValidate(boolean validate) {
        validator = validate ? new ThreadLocal<ValidationManager>() : null;
    }

    /**
     * Query whether or not this ItemReader will validate parsed CDOs
     * 
     * @return True iff this ItemReader will automatically validate read CDOs
     */
    public boolean isValidating() {
        return validator != null;
    }

    /**
     * Query whether this item reader will fail fast when validating CDOs
     * 
     * @return True iff the validator will abort on first failure
     */
    public boolean isFailfast() {
        return failfast;
    }

    /**
     * Set whether or not you want validation to fail fast.
     * If false, the exception thrown when validating will be a compound exception will all validation failures
     * 
     * @param failfast Whether or not to fail fast
     */
    public void setFailfast(boolean failfast) {
        this.failfast = failfast;
    }

    /**
     * Gets the SplittingReaderSource from which CDOs are being parsed
     * 
     * @return This reader's SplittingReaderSource
     */
    public SplittingReaderSource getSource() {
        return source;
    }

    /**
     * Sets the source that this reader will read from
     * 
     * @param source The SplittingReaderSource to read data from
     */
    public void setSource(SplittingReaderSource source) {
        this.source = source;
    }

    /**
     * Sets the iO source factory to use
     * 
     * @param ioSourceFactory
     */
    public void setSourceFactory(SourceFactory ioSourceFactory) {
        this.ioSourceFactory = ioSourceFactory;
    }

    public SourceFactory getSourceFactory() {
        return this.ioSourceFactory;
    }

    /**
     * Initialise our context
     * 
     * @param stepExecution The step execution context
     */
    @BeforeStep
    public void setup(StepExecution stepExecution) {
        source.initialise(stepExecution);
    }

    /**
     * Clean up any resources we're consuming
     */
    @AfterStep
    public void cleanup() {
        if (validator != null) {
            validator = new ThreadLocal<ValidationManager>();
        }
        source.close();
    }

    /**
     * Structure to associate a to-be-parsed element with externally supplied context.
     * The ParseListener callback enables an external object to associate context with an element. This structure 
     * allows them to be held together during processing; this is necessary to avoid race conditions.
     * 
     * @author Andrew Elmore
     */
    protected static class ElementContext {
        public ElementContext(String element, Object context) {
            this.element = element;
            this.context = context;
        }

        public Object context;
        public String element;
    }

    /**
     * Reads a line of text from the SplittingReader. The definition of line is implementation dependent.
     * This implementation breaks lines around carriage returns and line feeds.
     * 
     * @param reader The SplittingReader to consume characters from
     * @return A line of text
     * @throws IOException
     */
    protected String readLine(SplittingReader reader) throws IOException {
        return reader.readLine();
    }

    /**
     * Extracts the textual data for an element from the SplittingReader using the elementStartPattern to split
     * up the data. 
     * 
     * If a ParseListener is registered, it will receive a callback when a line is read from the reader and when 
     * an element has been extracted.
     * 
     * @param reader The SplittingReader to extract the element from
     */
    protected ElementContext readElement(SplittingReader reader) {

        StringBuffer elementCache = new StringBuffer();
        boolean inElement = false;

        synchronized (reader) {
            try {
                while (reader.ready()) {
                    String line = readLine(reader);

                    if (line != null) {
                        if (parseListener != null) {
                            // Invoke callback
                            line = parseListener.processLine(line);
                        }
                        // We look for the start of a new element if either:
                        // a) We're not in an element or
                        // b) We don't have an elementStopPattern set (if we do and we're in a element, the presence of a line
                        // that matches the element start pattern is deemed to still be part of the same element)
                        if ((!inElement || elementStopPattern == null)
                                && elementStartPattern.matcher(line).matches()) {
                            // We've encountered the start of a new element
                            String message = elementCache.toString();
                            if (message.trim().length() > 0) {
                                // We were already parsing an element; thus we've finished extracting our element
                                // Cache the line
                                reader.pushback(line);
                                // ...and return what we have already extracted
                                ElementContext context = new ElementContext(message,
                                        parseListener == null ? null : parseListener.getContext(message));
                                return context;
                            } else {
                                // This is the start of our element. Add it to our elementCache.
                                inElement = true;
                            }
                        }

                        if (inElement) {
                            // More data for our current element
                            elementCache.append(line);

                            // If we have an elementStopPattern, see if the line matched
                            if (elementStopPattern != null && elementStopPattern.matcher(line).matches()) {
                                // We've encountered the end of the element
                                break;
                            }
                        }
                    }
                }
            } catch (IOException ioEx) {
                throw new NonTransientResourceException("Failed to extract entity", ioEx);
            }
        }

        String message = elementCache.toString();
        ElementContext context = new ElementContext(message,
                parseListener == null ? null : parseListener.getContext(message));
        return context;
    }

    /**
     * Called once a thread determines it has exhausted the current parser (more accurately, the underlying Reader).
     * Triggers creation of an appropriate new Parser next time getParser is called.
     * 
     * @param parser The parser that has been exhausted.
     * @throws IOException 
     */
    private void discardParser(Parser parser) {
        // If there's no splitting pattern, we have to ensure that we discard the underlying reader too
        if (elementStartPattern == null) {
            try {
                source.discard(parser.getSplitter());
            } catch (IOException ioEx) {
                // We'll carry on; worst case scenario a failure will be logged multiple times
                LOG.warn("Failed to close reader on source {}", source.getName());
            }
        }
        if (this.elementStartPattern == null && source.useMultipleThreadsPerReader()) {
            synchronized (this) {
                if (this.parser == parser) {
                    this.parser = null;
                }
            }
        } else {
            threadedParser.set(null);
        }
    }

    /**
     * Gets the appropriate iO Source to use to read the message.
     * If ioSourceFactory is not set, it defaults to the model's default source.
     * 
     * @param An optional Reader to pass to the source's setReader method
     * 
     * @return A configured iO source
     */
    private Source getIoSource(Reader reader) {
        Source source = null;

        if (ioSourceFactory == null) {
            // Use the default
            source = elementType.getModel().source();
            if (reader != null) {
                source.setReader(reader);
            }
        } else {
            // If the reader is null, we have to give the factory a dummy one
            source = ioSourceFactory.getSource(reader != null ? reader : new StringReader(""));
        }

        if (source instanceof TextualSource) {
            ((TextualSource) source).setEndOfDataRequired(false);
        }

        return source;
    }

    /**
     * Gets a configured parser for this thread to use to parse messages.
     * Depending on configuration, threads may or may not share the source.
     * 
     * @return The parser this thread should use to parse messages.
     */
    private Parser getParser() {

        Parser returnParser = null;

        // We operate in one of 3 modes
        // 1. We have no splitter pattern and the ReaderSource advises us to share the Reader between threads
        // In this case all threads must share the same parser; make sure that we return a synchronized parser
        if (this.elementStartPattern == null && source.useMultipleThreadsPerReader()) {
            returnParser = parser;
            if (returnParser == null) {
                synchronized (this) {
                    if (parser == null) {
                        SplittingReader splitter = source.getReader();
                        if (splitter != null) {
                            returnParser = new SyncParser(splitter, getIoSource(splitter), elementType);
                            parser = returnParser;
                        }
                    }
                }
            }
        }

        // 2. The ReaderSource advises us not to share the reader between threads
        // In this case, each thread will have its own parser and we need to ask for a new Reader each time we create one
        else if (!source.useMultipleThreadsPerReader()) {
            returnParser = threadedParser.get();

            boolean needNewReader = returnParser == null;
            if (!needNewReader) {
                try {
                    needNewReader = !returnParser.getReader().ready();
                } catch (IOException ex) {
                    // Unhelpfully if the stream has been closed beneath our feet this is how we find out about it
                    // Even more unhelpfully, it appears as though the SAXParser does exactly that when it's finished parsing
                    needNewReader = true;
                }
            }

            if (needNewReader) {
                SplittingReader splitter = source.getNextReader();
                if (splitter != null) {
                    // If we don't have a splitting pattern, pass the splitter directly to the iO source
                    // If we do, pass null as we'll create a new Reader for it below
                    returnParser = new Parser(splitter,
                            getIoSource(this.elementStartPattern == null ? splitter : null), elementType);
                    threadedParser.set(returnParser);
                }
            }

        }
        // 3. We have a splitter pattern and the Reader source advises us to share the Reader between threads
        // In this case each thread will have its own parser but we'll share a reader and keep using it until it runs out
        else {
            returnParser = threadedParser.get();
            if (returnParser == null) {
                SplittingReader splitter = source.getReader();
                if (splitter != null) {
                    returnParser = new Parser(splitter, getIoSource(null), elementType);
                    threadedParser.set(returnParser);
                }
            }
        }

        return returnParser;
    }

    /*
     * (non-Javadoc)
     * @see org.springframework.batch.item.ItemReader#read()
     */
    @SuppressWarnings("unchecked")
    @Override
    public Result read() throws UnexpectedInputException, ParseException, NonTransientResourceException {

        ComplexDataObject result = null;
        Object context = null;
        Parser parser = null;

        // Keep trying to parse an entity until either we get one (result != null) or we run out of data to read (parser == null)
        // BufferedReaderSources such as the ZipFileSource can return multiple BufferedReaders; when our current one is exhausted it
        // will return another one
        while (result == null && (parser = getParser()) != null) {

            if (elementStartPattern != null) {

                // We're possibly sharing a BufferedReader with other threads. Get our data out of it as quickly as we can to reduce
                // the amount of time we spend blocking others
                SplittingReader reader = parser.getSplitter();
                if (reader == null) {
                    // There's nothing left to read
                    break;
                }

                // Get the textual source for an element from the reader
                ElementContext elementContext = readElement(reader);
                String element = elementContext.element;
                context = elementContext.context;

                // If we got something then parse it
                if (element != null && element.trim().length() > 0) {

                    StringReader stringReader = new StringReader(element);

                    parser.setReader(stringReader);

                    try {
                        result = parser.read();
                    } catch (IOException ioEx) {
                        throw new ParseException(
                                "Failed to parse CDO from " + source.getName() + ". Message: " + element, ioEx);
                    }
                } else {
                    // This parser has been exhausted
                    discardParser(parser);
                }

            } else {
                // We'll parse CDOs from the parser in serial
                try {
                    result = parser.read();
                } catch (IOException ioEx) {
                    throw new ParseException("Failed to parse CDO from " + source.getName(), ioEx);
                } finally {
                    if (result != null && result.getTotalAttrCount() == 0 && result.getTotalElementCount() == 0) {
                        // We didn't manage to read anything
                        result = null;
                    }
                    if (result == null) {
                        // We've exhausted this reader
                        // In the event of an exception being thrown there might still be data left in the reader 
                        // but as we have no way to skip to the next message, we have to abandon it
                        discardParser(parser);
                    }
                }
            }
        }

        if (validator != null && result != null) {
            try {
                ValidationManager mgr = validator.get();
                if (mgr == null) {
                    mgr = new ValidationManager();
                    validator.set(mgr);
                }
                if (failfast) {
                    mgr.validateByException(result);
                } else {
                    // Capture all failures
                    final Collection<ValidationEvent> events = new LinkedList<ValidationEvent>();

                    ValidationListener listener = new ValidationListener() {
                        public void validationPassed(ValidationEvent ve) {
                        }

                        public void validationFailed(ValidationEvent ve) {
                            events.add(ve);
                        }
                    };

                    mgr.addValidationListener(listener);

                    try {
                        if (!mgr.validateByEvents(result)) {
                            if (events.size() == 1) {
                                // Treat it as though we were validating by exception
                                mgr.setEventBased(false);
                                mgr.fireValidationEvent(events.iterator().next());
                            } else {
                                throw new C24CompoundValidationException(result, events);
                            }
                        }
                    } finally {
                        mgr.removeValidationListener(listener);
                    }
                }
            } catch (ValidationException vEx) {
                throw new C24ValidationException(
                        "Failed to validate message: " + vEx.getLocalizedMessage() + " [" + source.getName() + "]",
                        result, vEx);
            }
        }

        // If we have a ParseListener registered, allow it to intercept the return value
        return parseListener == null || result == null ? (Result) result : parseListener.process(result, context);

    }

}