Java tutorial
/** * Copyright (c) 2014, Cloudera, Inc. All Rights Reserved. * * Cloudera, Inc. licenses this file to you under the Apache License, * Version 2.0 (the "License"). You may not use this file except in * compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. See the License for * the specific language governing permissions and limitations under the * License. */ package com.cloudera.oryx.contrib.flume; import java.util.List; import org.apache.commons.lang.StringEscapeUtils; import org.apache.flume.Event; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.jayway.jsonpath.JsonPath; import com.jayway.jsonpath.PathNotFoundException; import com.jayway.jsonpath.ReadContext; /** * <p> * An {@link OryxEventParser} implementation for parsing JSON formatted {@link Event} bodies. * </p> * <p> * {@link JsonPath} is used to extract the <tt>oryxFields</tt> from the JSON objects. To configure * this parser <tt>oryxFields</tt> should be given as JSON paths. For example, to extract * <tt>user-id</tt> and <tt>product-code</tt> from the following JSON object: * </p> * <p> * <code> * { * "user": { * "id": "fd156752df3d", * "email": "kinley@cloudera.com" * }, * "product": { * "group": "running shoes", * "product-name": "Inov-8 Road-X Lite", * "product-code": "488589e7c994", * "search-terms": "natural running" * } * } * </code> * </p> * <p> * <tt>oryxFields</tt> should be defined in the Flume configuration as follows: * </p> * <p> * <code> * AgentName.sinks.SinkName.oryxFields = $.user.id, $.product.product-code, 1.0 * </code> * </p> * See https://code.google.com/p/json-path for more information. */ public class OryxJSONEventParser implements OryxEventParser { private static final Logger log = LoggerFactory.getLogger(OryxJSONEventParser.class); public OryxJSONEventParser() { } /** {@inheritDoc} */ @Override public void parseEvent(Event event, List<List<String>> fields, List<String> batch) { ReadContext context = JsonPath.parse(new String(event.getBody())); for (List<String> keys : fields) { StringBuilder record = new StringBuilder(); // add user and item for (int i = 0; i < 2; i++) { String val = null; try { val = context.read(keys.get(i)); } catch (Exception ex) { if (ex instanceof PathNotFoundException) { log.error("Unable to find key '{}'", keys.get(i), ex); } else if (ex instanceof ClassCastException) { log.error("Unable to cast value for key '{}'", keys.get(i), ex); } if (log.isDebugEnabled()) { log.debug("Skipping record '{}'", new String(event.getBody())); } record.setLength(0); break; } record.append(StringEscapeUtils.escapeCsv(val) + ","); } if (record.length() > 0) { if (keys.size() == 3) { // add strength record.append(keys.get(2)); } else { record.deleteCharAt(record.length() - 1); } if (log.isDebugEnabled()) { log.debug("Adding record '{}' to batch", record.toString()); } batch.add(record.toString()); } } } }