com.cloudera.oryx.contrib.flume.OryxJSONEventParser.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.oryx.contrib.flume.OryxJSONEventParser.java

Source

/**
 * Copyright (c) 2014, Cloudera, Inc. All Rights Reserved.
 *
 * Cloudera, Inc. licenses this file to you under the Apache License,
 * Version 2.0 (the "License"). You may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied. See the License for
 * the specific language governing permissions and limitations under the
 * License.
 */

package com.cloudera.oryx.contrib.flume;

import java.util.List;

import org.apache.commons.lang.StringEscapeUtils;
import org.apache.flume.Event;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.jayway.jsonpath.JsonPath;
import com.jayway.jsonpath.PathNotFoundException;
import com.jayway.jsonpath.ReadContext;

/**
 * <p>
 * An {@link OryxEventParser} implementation for parsing JSON formatted {@link Event} bodies.
 * </p>
 * <p>
 * {@link JsonPath} is used to extract the <tt>oryxFields</tt> from the JSON objects. To configure
 * this parser <tt>oryxFields</tt> should be given as JSON paths. For example, to extract
 * <tt>user-id</tt> and <tt>product-code</tt> from the following JSON object:
 * </p>
 * <p>
 * <code>
 * {
 *   "user": {
 *     "id": "fd156752df3d",
 *     "email": "kinley@cloudera.com"
 *   },
 *   "product": {
 *     "group": "running shoes",
 *     "product-name": "Inov-8 Road-X Lite",
 *     "product-code": "488589e7c994",
 *     "search-terms": "natural running"
 *   }
 * } 
 * </code>
 * </p>
 * <p>
 * <tt>oryxFields</tt> should be defined in the Flume configuration as follows:
 * </p>
 * <p>
 * <code>
 * AgentName.sinks.SinkName.oryxFields = $.user.id, $.product.product-code, 1.0
 * </code>
 * </p>
 * See https://code.google.com/p/json-path for more information.
 */
public class OryxJSONEventParser implements OryxEventParser {
    private static final Logger log = LoggerFactory.getLogger(OryxJSONEventParser.class);

    public OryxJSONEventParser() {
    }

    /** {@inheritDoc} */
    @Override
    public void parseEvent(Event event, List<List<String>> fields, List<String> batch) {
        ReadContext context = JsonPath.parse(new String(event.getBody()));

        for (List<String> keys : fields) {
            StringBuilder record = new StringBuilder();
            // add user and item
            for (int i = 0; i < 2; i++) {
                String val = null;
                try {
                    val = context.read(keys.get(i));
                } catch (Exception ex) {
                    if (ex instanceof PathNotFoundException) {
                        log.error("Unable to find key '{}'", keys.get(i), ex);
                    } else if (ex instanceof ClassCastException) {
                        log.error("Unable to cast value for key '{}'", keys.get(i), ex);
                    }
                    if (log.isDebugEnabled()) {
                        log.debug("Skipping record '{}'", new String(event.getBody()));
                    }
                    record.setLength(0);
                    break;
                }
                record.append(StringEscapeUtils.escapeCsv(val) + ",");
            }

            if (record.length() > 0) {
                if (keys.size() == 3) {
                    // add strength
                    record.append(keys.get(2));
                } else {
                    record.deleteCharAt(record.length() - 1);
                }
                if (log.isDebugEnabled()) {
                    log.debug("Adding record '{}' to batch", record.toString());
                }
                batch.add(record.toString());
            }
        }
    }

}