org.eclipse.smila.processing.pipelets.xmlprocessing.XmlSplitterPipelet.java Source code

Java tutorial

Introduction

Here is the source code for org.eclipse.smila.processing.pipelets.xmlprocessing.XmlSplitterPipelet.java

Source

/***********************************************************************************************************************
 * Copyright (c) 2008, 2011 Attensity Europe GmbH and brox IT Solutions GmbH. All rights reserved. This program and the
 * accompanying materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this
 * distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
 * 
 * Contributors: Daniel Stucky (empolis GmbH) - initial API and implementation
 *               Andreas Weber (Attensity Europe GmbH) - data model simplification
 **********************************************************************************************************************/

package org.eclipse.smila.processing.pipelets.xmlprocessing;

import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.methods.GetMethod;
import org.eclipse.smila.blackboard.Blackboard;
import org.eclipse.smila.connectivity.ConnectivityException;
import org.eclipse.smila.connectivity.ConnectivityManager;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.DataFactoryCreator;
import org.eclipse.smila.datamodel.Record;
import org.eclipse.smila.processing.JMSMessageProperties;
import org.eclipse.smila.processing.ProcessingException;
import org.eclipse.smila.processing.pipelets.ATransformationPipelet;
import org.eclipse.smila.utils.service.ServiceUtils;
import org.eclipse.smila.utils.xml.stax.MarkerTag;
import org.eclipse.smila.utils.xml.stax.SimpleTagExtractor;
import org.eclipse.smila.utils.xml.stax.XmlSnippetHandler;
import org.eclipse.smila.utils.xml.stax.XmlSnippetSplitter;

/**
 * Pipelet that splits a XML stream into multiple xml snippets. For each snippet a new Record is created where the XML
 * snippet is stored in either an attribute or attachment. The created records are not returned as a PipeletResult (this
 * is just the same as the incoming RecordIds) but are directly send to the ConnectivityManager and are routed once more
 * to the Queue.
 * 
 * On each created record the Annotation <tt>MessageProperties</tt> is set with the key value pair
 * <tt>isXmlSnippet</tt>=<tt>true</tt>. This can be used in Listener rules to select for XML snippets to process.The
 * possible properties are:
 * <ul>
 * <li>beginTagName: the name of the tag to start the xml snippet</li>
 * <li>isBeginClosingTag: boolean flag if the beginTagName is a closing tag (true) or not (false)</li>
 * <li>endTagName: the name of the tag to end the xml snippet</li>
 * <li>isEndClosingTag: boolean flag if the endTagName is a closing tag (true) or not (false)</li>
 * <li>keyTagName: the name of the tag used to create a record id</li>
 * <li>maxBufferSize: the maximum size of the internal record buffer (optional, default is 20)</li>
 * <li>inputName: name of the Attribute/Attachment to read the XML Document from.</li>
 * <li>outputName: name of the Attribute/Attachment to store the extracted value in</li>
 * <li>inputType: the type (Attribute or Attachment of the inputName. An input Attribute is not interpreted as content
 * but as a file path or an URL to the XML document</li>
 * <li>outputType: the type (Attribute or Attachment of the outputtName</li>
 * </ul>
 */
public class XmlSplitterPipelet extends ATransformationPipelet {

    /** Constant for the property beginTagName. */
    public static final String PROP_BEGIN_TAG_NAME = "beginTagName";

    /** Constant for the property isBeginClosingTag. */
    public static final String PROP_IS_BEGIN_CLOSING_TAG = "isBeginClosingTag";

    /** Constant for the property endTagName. */
    public static final String PROP_END_TAG_NAME = "endTagName";

    /** Constant for the property isEndClosingTag. */
    public static final String PROP_IS_END_CLOSING_TAG = "isEndClosingTag";

    /** Constant for the property keyTagName. */
    public static final String PROP_KEY_TAG_NAME = "keyTagName";

    /** Constant for the property maxBufferSize. */
    public static final String PROP_MAX_BUFFER_SIZE = "maxBufferSize";

    /** Constant for the property idSeparator. */
    public static final String PROP_ID_SEPARATOR = "idSeparator";

    /** default id separator. */
    public static final String DEFAULT_ID_SEPARATOR = "#";

    /** Constant for the default max buffer size (20). */
    public static final int DEFAULT_MAX_BUFFER_SIZE = 20;

    /** The MarkerTag for the snippet begin. */
    private MarkerTag _beginTag;

    /** The MarkerTag for the snippet end. */
    private MarkerTag _endTag;

    /** The xml tag name containing the value used to generate new record ids. */
    private String _keyTagName;

    /** The separator used to generate the record ids of the splitted records. */
    private String _idSeparator;

    /** SimpleTagExtractor instance to extract key values. */
    private SimpleTagExtractor _extractor = new SimpleTagExtractor(true);

    /** Reference to the ConnectivityManager. */
    private ConnectivityManager _connectivityManager;

    /** The record buffer used to buffer created records before sending them in blocks to ConnectivityManager. */
    private List<Record> _recordBuffer = new ArrayList<Record>();

    /** The max buffer size. If reached the buffer is flushed. */
    private int _maxBufferSize = DEFAULT_MAX_BUFFER_SIZE;

    /**
     * {@inheritDoc}
     */
    public void configure(AnyMap configuration) throws ProcessingException {
        super.configure(configuration);
        final String beginTagName = configuration.getStringValue(PROP_BEGIN_TAG_NAME);
        if (beginTagName == null || beginTagName.trim().length() == 0) {
            throw new ProcessingException(
                    "Property " + PROP_BEGIN_TAG_NAME + " must not be <null> or an empty String");
        }
        final Boolean isBeginEndTag = configuration.getBooleanValue(PROP_IS_BEGIN_CLOSING_TAG);
        if (isBeginEndTag == null) {
            throw new ProcessingException("Property " + PROP_IS_BEGIN_CLOSING_TAG + " must not be <null>");
        }

        final String endTagName = configuration.getStringValue(PROP_END_TAG_NAME);
        if (endTagName == null || endTagName.trim().length() == 0) {
            throw new ProcessingException(
                    "Property " + PROP_END_TAG_NAME + " must not be <null> or an empty String");
        }
        final Boolean isEndEndTag = configuration.getBooleanValue(PROP_IS_END_CLOSING_TAG);
        if (isEndEndTag == null) {
            throw new ProcessingException("Property " + PROP_IS_END_CLOSING_TAG + " must not be <null>");
        }

        _keyTagName = configuration.getStringValue(PROP_KEY_TAG_NAME);
        if (_keyTagName == null || _keyTagName.trim().length() == 0) {
            throw new ProcessingException(
                    "Property " + PROP_KEY_TAG_NAME + " must not be <null> or an empty String");
        }

        _idSeparator = configuration.getStringValue(PROP_ID_SEPARATOR);
        if (_idSeparator == null) {
            _idSeparator = DEFAULT_ID_SEPARATOR;
        }

        final String bufferSize = configuration.getStringValue(PROP_MAX_BUFFER_SIZE);
        if (bufferSize != null) {
            _maxBufferSize = Integer.parseInt(bufferSize);
        }

        _beginTag = new MarkerTag(beginTagName, isBeginEndTag);
        _endTag = new MarkerTag(endTagName, isEndEndTag);
    }

    /**
     * {@inheritDoc}
     */
    public String[] process(Blackboard blackboard, String[] recordIds) throws ProcessingException {
        final InternalHandler snippetHandler = new InternalHandler();
        final XmlSnippetSplitter splitter = new XmlSnippetSplitter(snippetHandler, _beginTag, _endTag);
        if (recordIds != null) {
            for (final String id : recordIds) {
                try {
                    // get xml input stream
                    InputStream inputStream = null;
                    if (isReadFromAttribute()) {
                        inputStream = loadExternalInputStream(readStringInput(blackboard, id));
                    } else {
                        inputStream = blackboard.getAttachmentAsStream(id, getInputName());
                    }

                    snippetHandler.setCurrentRecord(id, blackboard.getRecord(id).getSource());
                    splitter.read(inputStream);
                    if (_log.isInfoEnabled()) {
                        _log.info("Created " + snippetHandler.getRecordCount() + " records from processing record "
                                + id);
                    }
                } catch (Exception e) {
                    if (_log.isWarnEnabled()) {
                        _log.warn("unable to split record " + id, e);
                    }
                }
            } // for
            try {
                flushRecordBuffer();
            } catch (Exception e) {
                throw new ProcessingException("error flushing record buffer", e);
            }
        } // if
        return recordIds;
    }

    /**
     * Get the ConnectivityManager.
     * 
     * @return the ConnectivityManager.
     * @throws InterruptedException
     *           if any error occurs
     */
    private ConnectivityManager getConnectivityManager() throws InterruptedException {
        if (_connectivityManager == null) {
            _connectivityManager = ServiceUtils.getService(ConnectivityManager.class);
        }
        return _connectivityManager;
    }

    /**
     * Adds the given record to the record buffer. If _maxBufferSize is reached a flush of the buffer is done.
     * 
     * @param record
     *          the Record to add to the buffer
     * @throws ConnectivityException
     *           if any error occurs
     * @throws InterruptedException
     *           if any error occurs
     */
    private void addToRecordBuffer(final Record record) throws ConnectivityException, InterruptedException {
        synchronized (_recordBuffer) {
            _recordBuffer.add(record);
            if (_recordBuffer.size() >= _maxBufferSize) {
                flushRecordBuffer();
            }
        }
    }

    /**
     * Flushes the record buffer if it is not empty.
     * 
     * @throws ConnectivityException
     *           if any error occurs
     * @throws InterruptedException
     *           if any error occurs
     */
    private void flushRecordBuffer() throws ConnectivityException, InterruptedException {
        synchronized (_recordBuffer) {
            if (!_recordBuffer.isEmpty()) {
                try {
                    getConnectivityManager().add(_recordBuffer.toArray(new Record[_recordBuffer.size()]));
                } finally {
                    _recordBuffer.clear();
                }
            }
        }
    }

    /**
     * Get the external InputStream to the given url or filee path.
     * 
     * @param attrtibuteValue
     *          the attrtibuteValue denoting an URL or file path
     * @return a InputStream or null
     * @throws IOException
     *           if any error occurs
     */
    private InputStream loadExternalInputStream(final String attrtibuteValue) throws IOException {
        InputStream stream = null;
        if (attrtibuteValue != null && attrtibuteValue.trim().length() > 0) {
            if (attrtibuteValue.startsWith("file")) {
                final URL url = new URL(attrtibuteValue);
                stream = new FileInputStream(url.getAuthority() + url.getPath());
            } else if (attrtibuteValue.startsWith("http")) {
                final URL url = new URL(attrtibuteValue);
                final HttpClient httpClient = new HttpClient();
                final GetMethod getMethod = new GetMethod(url.toString());
                httpClient.executeMethod(getMethod);
                stream = getMethod.getResponseBodyAsStream();
            } else {
                stream = new FileInputStream(attrtibuteValue);
            }
        } // if
        return stream;
    }

    /**
     * Internal XmlSnippetHandler implementation to handle the snippets, create id and record objects and send them to the
     * Queue.
     */
    class InternalHandler implements XmlSnippetHandler {

        /** The currently processed Id. used to generate fragment id objects. */
        private String _currentId;

        /** The currently processed source. used to generate fragment record snippet. */
        private String _source;

        /** Counts the total number of created records. */
        private int _recordCounter;

        /** Counts the number of invokes of handleSnippet() for the _currentId. */
        private int _countById;

        /**
         * Set the current record id and source used for snippet record creation.
         * 
         * @param id
         *          the current Id.
         * @param source
         *          the current source.
         */
        void setCurrentRecord(final String id, final String source) {
            _currentId = id;
            _source = source;
            _countById = 0;
        }

        /**
         * Returns the number of created records.
         * 
         * @return the number of created records
         */
        int getRecordCount() {
            return _recordCounter;
        }

        /**
         * {@inheritDoc}
         */
        public void handleSnippet(final byte[] snippet) {
            _countById++;
            String snippetId = null;
            try {
                final List<String> keys = _extractor.getTags(_keyTagName, new ByteArrayInputStream(snippet));
                if (!keys.isEmpty()) {
                    snippetId = _currentId + _idSeparator + keys.get(0);
                    final Record record = DataFactoryCreator.createDefaultFactory().createRecord(snippetId,
                            _source);
                    if (isStoreInAttribute()) {
                        record.getMetadata().put(_outputName, new String(snippet, ENCODING_ATTACHMENT));
                    } else {
                        record.setAttachment(_outputName, snippet);
                    }

                    // set message properties
                    final AnyMap messageProperties = DataFactoryCreator.createDefaultFactory().createAnyMap();
                    messageProperties.put(JMSMessageProperties.PROPERTY_IS_XML_SNIPPET, Boolean.toString(true));
                    record.getMetadata().put(JMSMessageProperties.MESSAGE_PROPERTIES, messageProperties);

                    _recordCounter++;
                    addToRecordBuffer(record);
                } else {
                    if (_log.isWarnEnabled()) {
                        _log.warn("could not find tag " + _keyTagName + " in snippet number " + _countById
                                + " of record " + _currentId);
                    }
                    if (_log.isTraceEnabled()) {
                        _log.trace("snippet content: " + new String(snippet));
                    }
                }
            } catch (Exception e) {
                if (_log.isErrorEnabled()) {
                    _log.error("error creating record for xml snippet number " + _countById + " with id "
                            + snippetId + " of record " + _currentId, e);
                }
                if (_log.isTraceEnabled()) {
                    _log.trace("snippet content: " + new String(snippet));
                }
            }
        }
    } // InternalHandler
}