Java tutorial
package com.adintellig; /** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.util.ArrayList; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.hadoop.io.Text; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; public class UDFJson { private final Pattern patternKey = Pattern.compile("^([a-zA-Z0-9_\\-]+).*"); private final Pattern patternIndex = Pattern.compile("\\[([0-9]+|\\*)\\]"); // An LRU cache using a linked hash map static class HashCache<K, V> extends LinkedHashMap<K, V> { private static final int CACHE_SIZE = 16; private static final int INIT_SIZE = 32; private static final float LOAD_FACTOR = 0.6f; HashCache() { super(INIT_SIZE, LOAD_FACTOR); } private static final long serialVersionUID = 1; @Override protected boolean removeEldestEntry(Map.Entry<K, V> eldest) { return size() > CACHE_SIZE; } } static Map<String, Object> extractObjectCache = new HashCache<String, Object>(); static Map<String, String[]> pathExprCache = new HashCache<String, String[]>(); static Map<String, ArrayList<String>> indexListCache = new HashCache<String, ArrayList<String>>(); static Map<String, String> mKeyGroup1Cache = new HashCache<String, String>(); static Map<String, Boolean> mKeyMatchesCache = new HashCache<String, Boolean>(); Text result = new Text(); public UDFJson() { } /** * Extract json object from a json string based on json path specified, and * return json string of the extracted json object. It will return null if * the input json string is invalid. * * A limited version of JSONPath supported: $ : Root object . : Child * operator [] : Subscript operator for array * : Wildcard for [] * * Syntax not supported that's worth noticing: '' : Zero length string as * key .. : Recursive descent &#064; : Current object/element () : * Script expression ?() : Filter (script) expression. [,] : Union operator * [start:end:step] : array slice operator * * @param jsonString * the json string. * @param pathString * the json path expression. * @return json string or null when an error happens. */ public Text evaluate(String jsonString, String pathString) { if (jsonString == null || jsonString == "" || pathString == null || pathString == "") { return null; } try { // Cache pathExpr String[] pathExpr = pathExprCache.get(pathString); if (pathExpr == null) { pathExpr = pathString.split("\\.", -1); pathExprCache.put(pathString, pathExpr); } if (!pathExpr[0].equalsIgnoreCase("$")) { return null; } // Cache extractObject Object extractObject = extractObjectCache.get(jsonString); if (extractObject == null) { extractObject = new JSONObject(jsonString); System.out.println(jsonString); System.out.println(extractObject); extractObjectCache.put(jsonString, extractObject); } for (int i = 1; i < pathExpr.length; i++) { extractObject = extract(extractObject, pathExpr[i]); } result.set(extractObject.toString()); return result; } catch (Exception e) { return null; } } private Object extract(Object json, String path) throws JSONException { // Cache patternkey.matcher(path).matches() Matcher mKey = null; Boolean mKeyMatches = mKeyMatchesCache.get(path); if (mKeyMatches == null) { mKey = patternKey.matcher(path); mKeyMatches = mKey.matches() ? Boolean.TRUE : Boolean.FALSE; mKeyMatchesCache.put(path, mKeyMatches); } if (!mKeyMatches.booleanValue()) { return null; } // Cache mkey.group(1) String mKeyGroup1 = mKeyGroup1Cache.get(path); if (mKeyGroup1 == null) { if (mKey == null) { mKey = patternKey.matcher(path); } mKeyGroup1 = mKey.group(1); mKeyGroup1Cache.put(path, mKeyGroup1); } json = extract_json_withkey(json, mKeyGroup1); // Cache indexList ArrayList<String> indexList = indexListCache.get(path); if (indexList == null) { Matcher mIndex = patternIndex.matcher(path); indexList = new ArrayList<String>(); while (mIndex.find()) { indexList.add(mIndex.group(1)); } indexListCache.put(path, indexList); } if (indexList.size() > 0) { json = extract_json_withindex(json, indexList); } return json; } ArrayList<Object> jsonList = new ArrayList<Object>(); private Object extract_json_withindex(Object json, ArrayList<String> indexList) throws JSONException { jsonList.clear(); jsonList.add(json); Iterator<String> itr = indexList.iterator(); while (itr.hasNext()) { String index = itr.next(); ArrayList<Object> tmp_jsonList = new ArrayList<Object>(); if (index.equalsIgnoreCase("*")) { for (int i = 0; i < (jsonList).size(); i++) { try { JSONArray array = (JSONArray) (jsonList).get(i); for (int j = 0; j < array.length(); j++) { tmp_jsonList.add(array.get(j)); } } catch (Exception e) { continue; } } jsonList = tmp_jsonList; } else { for (int i = 0; i < (jsonList).size(); i++) { try { tmp_jsonList.add(((JSONArray) (jsonList).get(i)).get(Integer.parseInt(index))); } catch (ClassCastException e) { continue; } catch (JSONException e) { return null; } jsonList = tmp_jsonList; } } } return (jsonList.size() > 1) ? new JSONArray(jsonList) : jsonList.get(0); } private Object extract_json_withkey(Object json, String path) throws JSONException { if (json.getClass() == org.json.JSONArray.class) { JSONArray jsonArray = new JSONArray(); for (int i = 0; i < ((JSONArray) json).length(); i++) { Object josn_elem = ((JSONArray) json).get(i); try { Object json_obj = ((JSONObject) josn_elem).get(path); if (json_obj.getClass() == org.json.JSONArray.class) { for (int j = 0; j < ((JSONArray) json_obj).length(); j++) { jsonArray.put(((JSONArray) json_obj).get(j)); } } else { jsonArray.put(json_obj); } } catch (Exception e) { continue; } } return (jsonArray.length() == 0) ? null : jsonArray; } else { return ((JSONObject) json).get(path); } } public static void main(String[] args) { String json = " {'t':'4';'idate':'20130128';'installday':'0';'activeinterval':'0';'activetotal':'0'}"; json = "[{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":2,\"time\":2,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":36,\"time\":36,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":10,\"time\":10,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"?\",\"category\":\"\",\"counter\":22,\"time\":22,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":8,\"time\":8,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":35,\"time\":35,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":5,\"time\":5,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":18,\"time\":18,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"?\",\"category\":\"\",\"counter\":124,\"time\":124,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"?\",\"category\":\"\",\"counter\":21,\"time\":21,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":34,\"time\":34,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":1,\"time\":1,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"?\",\"counter\":7,\"time\":7,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":47,\"time\":47,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":3,\"time\":3,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":13,\"time\":13,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":16,\"time\":16,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"?\",\"category\":\"\",\"counter\":23,\"time\":23,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":135,\"time\":135,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":281,\"time\":281,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":358,\"time\":358,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":4,\"time\":4,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"?\",\"category\":\"\",\"counter\":33,\"time\":33,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":43,\"time\":43,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":9,\"time\":9,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":42,\"time\":42,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"?\",\"category\":\"\",\"counter\":55,\"time\":55,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":58,\"time\":58,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"?\",\"category\":\"\",\"counter\":70,\"time\":70,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":119,\"time\":119,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"?\",\"counter\":284,\"time\":284,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"?\",\"category\":\"\",\"counter\":363,\"time\":363,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":449,\"time\":449,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"?\",\"category\":\"\",\"counter\":32,\"time\":32,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":61,\"time\":61,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":226,\"time\":226,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":11,\"time\":11,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"?\",\"counter\":29,\"time\":29,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":44,\"time\":44,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":69,\"time\":69,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":71,\"time\":71,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":77,\"time\":77,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":161,\"time\":161,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"?\",\"counter\":162,\"time\":162,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":245,\"time\":245,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":20,\"time\":20,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"??\",\"counter\":45,\"time\":45,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":46,\"time\":46,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"?\",\"counter\":92,\"time\":92,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":134,\"time\":134,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"\",\"counter\":6,\"time\":6,\"type\":\"\",\"uid\":\"\"},{\"ID\":\"\",\"area\":\"\",\"category\":\"?\",\"counter\":51,\"time\":51,\"type\":\"\",\"uid\":\"\"}]"; UDFJson udf = new UDFJson(); System.out.println(udf.evaluate(json, "$.ID")); } }