Java tutorial
/* * Copyright (c) 1995-2014, The University of Sheffield. See the file * COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt * * This file is part of GATE (see http://gate.ac.uk/), and is free * software, licenced under the GNU Library General Public License, * Version 2, June 1991 (in the distribution as file licence.html, * and also available at http://gate.ac.uk/gate/licence.html). * * $Id: TweetUtils.java 17719 2014-03-20 20:41:29Z adamfunk $ */ package gate.corpora.twitter; import gate.Factory; import gate.FeatureMap; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.apache.commons.lang.StringUtils; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ArrayNode; /* REFERENCES * Jackson API * http://wiki.fasterxml.com/JacksonHome * Standard: RFC 4627 * https://tools.ietf.org/html/rfc4627 * */ public class TweetUtils { public static final String PATH_SEPARATOR = ":"; public static final String MIME_TYPE = "text/x-json-twitter"; public static final String DEFAULT_ENCODING = "UTF-8"; public static final String TWEET_ANNOTATION_TYPE = "Tweet"; public static final String DEFAULT_TEXT_ATTRIBUTE = "text"; public static final String[] DEFAULT_CONTENT_KEYS = { DEFAULT_TEXT_ATTRIBUTE, "created_at", "user:name" }; public static final String[] DEFAULT_FEATURE_KEYS = { "user:screen_name", "user:location", "id", "source", "truncated", "retweeted_status:id" }; public static List<Tweet> readTweets(String string) throws IOException { if (string.startsWith("[")) { return readTweetList(string, null, null); } // implied else return readTweetLines(string, null, null); } public static List<Tweet> readTweets(String string, List<String> contentKeys, List<String> featureKeys) throws IOException { if (string.startsWith("[")) { return readTweetList(string, contentKeys, featureKeys); } // implied else return readTweetLines(string, contentKeys, featureKeys); } public static List<Tweet> readTweetLines(String string, List<String> contentKeys, List<String> featureKeys) throws IOException { String[] lines = string.split("[\\n\\r]+"); return readTweetStrings(lines, contentKeys, featureKeys); } public static List<Tweet> readTweetStrings(String[] lines, List<String> contentKeys, List<String> featureKeys) throws IOException { ObjectMapper mapper = new ObjectMapper(); List<Tweet> tweets = new ArrayList<Tweet>(); for (String line : lines) { if (line.length() > 0) { JsonNode jnode = mapper.readTree(line); tweets.add(Tweet.readTweet(jnode, contentKeys, featureKeys)); } } return tweets; } public static List<Tweet> readTweetStrings(List<String> lines, List<String> contentKeys, List<String> featureKeys) throws IOException { ObjectMapper mapper = new ObjectMapper(); List<Tweet> tweets = new ArrayList<Tweet>(); for (String line : lines) { if (line.length() > 0) { JsonNode jnode = mapper.readTree(line); tweets.add(Tweet.readTweet(jnode, contentKeys, featureKeys)); } } return tweets; } public static List<Tweet> readTweetList(String string, List<String> contentKeys, List<String> featureKeys) throws IOException { ObjectMapper mapper = new ObjectMapper(); List<Tweet> tweets = new ArrayList<Tweet>(); ArrayNode jarray = (ArrayNode) mapper.readTree(string); for (JsonNode jnode : jarray) { tweets.add(Tweet.readTweet(jnode, contentKeys, featureKeys)); } return tweets; } public static Object process(JsonNode node) { /* JSON types: number, string, boolean, array, object (dict/map), * null. All map keys are strings. */ if (node.isBoolean()) { return node.asBoolean(); } if (node.isDouble()) { return node.asDouble(); } if (node.isInt()) { return node.asInt(); } if (node.isTextual()) { return node.asText(); } if (node.isNull()) { return null; } if (node.isArray()) { List<Object> list = new ArrayList<Object>(); for (JsonNode item : node) { list.add(process(item)); } return list; } if (node.isObject()) { FeatureMap map = Factory.newFeatureMap(); Iterator<String> keys = node.fieldNames(); while (keys.hasNext()) { String key = keys.next(); map.put(key, process(node.get(key))); } return map; } return node.toString(); } public static FeatureMap process(JsonNode node, List<String> keepers) { FeatureMap found = Factory.newFeatureMap(); for (String keeper : keepers) { String[] keySequence = StringUtils.split(keeper, PATH_SEPARATOR); Object value = dig(node, keySequence, 0); if (value != null) { found.put(keeper, value); } } return found; } /** * Dig through a JSON object, key-by-key (recursively). * @param node * @param keySequence * @return the value held by the last key in the sequence; this will * be a FeatureMap if there is further nesting */ public static Object dig(JsonNode node, String[] keySequence, int index) { if ((index >= keySequence.length) || (node == null)) { return null; } if (node.has(keySequence[index])) { JsonNode value = node.get(keySequence[index]); if (keySequence.length == (index + 1)) { // Found last key in sequence; convert the JsonNode // value to a normal object (possibly FeatureMap) return process(value); } else if (value != null) { // Found current key; keep digging for the rest return dig(value, keySequence, index + 1); } } return null; } }