Java tutorial
/* * Licensed to Metamarkets Group Inc. (Metamarkets) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. Metamarkets licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.metamx.common.parsers; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Charsets; import com.jayway.jsonpath.Configuration; import com.jayway.jsonpath.JsonPath; import com.jayway.jsonpath.Option; import com.jayway.jsonpath.spi.json.JacksonJsonProvider; import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider; import com.metamx.common.Pair; import com.metamx.common.StringUtils; import java.math.BigInteger; import java.nio.charset.CharsetEncoder; import java.util.ArrayList; import java.util.EnumSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; /** * JSON parser class that uses the JsonPath library to access fields via path expressions. */ public class JSONPathParser implements Parser<String, Object> { private final Map<String, Pair<FieldType, JsonPath>> fieldPathMap; private final List<FieldSpec> fieldSpecs; private final boolean useFieldDiscovery; private final ObjectMapper mapper; private final CharsetEncoder enc = Charsets.UTF_8.newEncoder(); private final Configuration jsonPathConfig; /** * Constructor * * @param fieldSpecs List of field specifications. * @param useFieldDiscovery If true, automatically add root fields seen in the JSON document to the parsed object Map. * Only fields that contain a singular value or flat list (list containing no subobjects or lists) are automatically added. * @param mapper Optionally provide an ObjectMapper, used by the parser for reading the input JSON. */ public JSONPathParser(List<FieldSpec> fieldSpecs, boolean useFieldDiscovery, ObjectMapper mapper) { this.fieldSpecs = fieldSpecs; this.fieldPathMap = generateFieldPaths(fieldSpecs); this.useFieldDiscovery = useFieldDiscovery; this.mapper = mapper == null ? new ObjectMapper() : mapper; // Avoid using defaultConfiguration, as this depends on json-smart which we are excluding. this.jsonPathConfig = Configuration.builder().jsonProvider(new JacksonJsonProvider()) .mappingProvider(new JacksonMappingProvider()).options(EnumSet.of(Option.SUPPRESS_EXCEPTIONS)) .build(); } @Override public List<String> getFieldNames() { return null; } @Override public void setFieldNames(Iterable<String> fieldNames) { } /** * @param input JSON string. The root must be a JSON object, not an array. * e.g., {"valid": "true"} and {"valid":[1,2,3]} are supported * but [{"invalid": "true"}] and [1,2,3] are not. * * @return A map of field names and values */ @Override public Map<String, Object> parse(String input) { try { Map<String, Object> map = new LinkedHashMap<>(); Map<String, Object> document = mapper.readValue(input, new TypeReference<Map<String, Object>>() { }); for (Map.Entry<String, Pair<FieldType, JsonPath>> entry : fieldPathMap.entrySet()) { String fieldName = entry.getKey(); Pair<FieldType, JsonPath> pair = entry.getValue(); JsonPath path = pair.rhs; Object parsedVal; if (pair.lhs == FieldType.ROOT) { parsedVal = document.get(fieldName); } else { parsedVal = path.read(document, jsonPathConfig); } if (parsedVal == null) { continue; } parsedVal = valueConversionFunction(parsedVal); map.put(fieldName, parsedVal); } if (useFieldDiscovery) { discoverFields(map, document); } return map; } catch (Exception e) { throw new ParseException(e, "Unable to parse row [%s]", input); } } private Map<String, Pair<FieldType, JsonPath>> generateFieldPaths(List<FieldSpec> fieldSpecs) { Map<String, Pair<FieldType, JsonPath>> map = new LinkedHashMap<>(); for (FieldSpec fieldSpec : fieldSpecs) { String fieldName = fieldSpec.getName(); if (map.get(fieldName) != null) { throw new IllegalArgumentException("Cannot have duplicate field definition: " + fieldName); } JsonPath path = JsonPath.compile(fieldSpec.getExpr()); Pair<FieldType, JsonPath> pair = new Pair<>(fieldSpec.getType(), path); map.put(fieldName, pair); } return map; } private void discoverFields(Map<String, Object> map, Map<String, Object> document) { for (String field : document.keySet()) { if (!map.containsKey(field)) { Object val = document.get(field); if (val == null) { continue; } if (val instanceof Map) { continue; } if (val instanceof List) { if (!isFlatList((List) val)) { continue; } } val = valueConversionFunction(val); map.put(field, val); } } } private Object valueConversionFunction(Object val) { if (val instanceof Integer) { return Long.valueOf((Integer) val); } if (val instanceof BigInteger) { return Double.valueOf(((BigInteger) val).doubleValue()); } if (val instanceof String) { return charsetFix((String) val); } if (val instanceof List) { List<Object> newList = new ArrayList<>(); for (Object entry : ((List) val)) { newList.add(valueConversionFunction(entry)); } return newList; } if (val instanceof Map) { Map<String, Object> newMap = new LinkedHashMap<>(); Map<String, Object> valMap = (Map<String, Object>) val; for (Map.Entry<String, Object> entry : valMap.entrySet()) { newMap.put(entry.getKey(), valueConversionFunction(entry.getValue())); } return newMap; } return val; } private String charsetFix(String s) { if (s != null && !enc.canEncode(s)) { // Some whacky characters are in this string (e.g. \uD900). These are problematic because they are decodeable // by new String(...) but will not encode into the same character. This dance here will replace these // characters with something more sane. return StringUtils.fromUtf8(StringUtils.toUtf8(s)); } else { return s; } } private boolean isFlatList(List<Object> list) { for (Object obj : list) { if ((obj instanceof Map) || (obj instanceof List)) { return false; } } return true; } /** * Specifies access behavior for a field. */ public enum FieldType { /** * A ROOT field is read directly from the JSON document root without using the JsonPath library. */ ROOT, /** * A PATH field uses a JsonPath expression to retrieve the field value */ PATH; } /** * Specifies a field to be added to the parsed object Map, using JsonPath notation. * * See <a href="https://github.com/jayway/JsonPath">https://github.com/jayway/JsonPath</a> for more information. */ public static class FieldSpec { private final FieldType type; private final String name; private final String expr; /** * Constructor * * @param type Specifies how this field should be retrieved. * @param name Name of the field, used as the key in the Object map returned by the parser. * For ROOT fields, this must match the field name as it appears in the JSON document. * @param expr Only used by PATH type fields, specifies the JsonPath expression used to access the field. */ public FieldSpec(FieldType type, String name, String expr) { this.type = type; this.name = name; this.expr = expr; } public FieldType getType() { return type; } public String getName() { return name; } public String getExpr() { return expr; } } }