Java tutorial
/** * Copyright 2014 Yahoo! Inc. Licensed under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software distributed * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. * See accompanying LICENSE file. */ package com.yahoo.sql4d.sql4ddriver; import com.yahoo.sql4d.query.RequestType; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import org.json.JSONArray; import org.json.JSONObject; import scala.Tuple2; /** * Provides facilities to do chain of joins. * * @author srikalyan */ public class Joiner4All extends BaseJoiner { public Map<Object, List<Object>> baseAllRows = new LinkedHashMap<>();// Each entry = <key value, list of all values> public Joiner4All() { } public Joiner4All(JSONArray jsonAllRows, List<String> joinField) { join(jsonAllRows, joinField, ActionType.FIRST_CUT); } /** * Generate a Tuple <A, B> * A = list of field names * B = map of <joinFeild, rowList> from jsonArray. * @param timestamp * @param jsonAllRows * @param joinFields * @param requestType * @param action */ @Override protected void extractAndTakeAction(String timestamp, JSONArray jsonAllRows, List<String> joinFields, RequestType requestType, ActionType action) { Map<Object, List<Object>> newBaseAllRows = new LinkedHashMap<>(); JSONObject eachRow = null; for (int i = 0; i < jsonAllRows.length(); i++) { JSONObject jsonItem = jsonAllRows.getJSONObject(i); eachRow = dataItemAt(jsonItem, requestType, action); String actualTimestamp = timestamp; if (timestamp == null) { if (requestType == RequestType.SELECT) { actualTimestamp = eachRow.optString("timestamp"); // Because the timestamp is within each row remove them once you extract it. eachRow.remove("timestamp"); } else { actualTimestamp = jsonItem.optString("timestamp"); } } Tuple2<Object, List<Object>> row = mapPkToRow(actualTimestamp, eachRow, joinFields); Object pk = row._1();// Primary key. List<Object> rowVal = row._2(); if (action == ActionType.FIRST_CUT) {// First cut (no join). baseAllRows.put(pk, rowVal); } else {// Some Join action. if (baseAllRows.containsKey(pk)) {// If any item in set B exist in set A. rowVal.removeAll(splitCompositeKeys(pk.toString()));// Remove key(which is already available in baseAllRows) if (!joinFields.contains("timestamp")) {// If join field is not "timestamp" then remove timestamp data value from to be joined set. rowVal.remove(0);// Because the 1st field is always timestamp.(See extractKeyAndRow() method) } if (action == ActionType.JOIN || action == ActionType.RIGHT_JOIN) { newBaseAllRows.put(pk, baseAllRows.remove(pk));// Remove from existing map and add to new map. newBaseAllRows.get(pk).addAll(rowVal);// Update the new map with partial to be joined data. } else if (action == ActionType.LEFT_JOIN) {// Left join baseAllRows.get(pk).addAll(rowVal); } } else {// For right join we still need to continue with each row in set B. if (action == ActionType.RIGHT_JOIN) {// Right join newBaseAllRows.put(pk, rowVal); } } } if (i == 0) {// Fill headers (only once) fillHeaders(eachRow, joinFields, action); } } if (!newBaseAllRows.isEmpty()) {// For join and right join. baseAllRows = newBaseAllRows; } } /** * Extract fields(k,v) from json * k = primary field(s) could be a composite key. * v = all fields . The first field is always timestamp. * Presumption is jsonRow object passed to this method should not have timestamp field. * @param timestamp * @param jsonRow (This is a jsonObject without timestamp(even for select query response though timestamp is present it is stripped off before passing it to this method) * @param joinFields * @return */ private static Tuple2<Object, List<Object>> mapPkToRow(String timestamp, JSONObject jsonRow, List<String> joinFields) { Object joinValue = null; List<Object> rowValues = new ArrayList<>(); rowValues.add(timestamp); for (Object key : jsonRow.keySet()) { String colName = key.toString(); rowValues.add(jsonRow.get(colName)); if (joinFields.contains(colName)) { joinValue = (joinValue == null) ? jsonRow.get(colName) : (joinValue + "\u0001" + jsonRow.get(colName)); } } if (joinFields.contains("timestamp")) {// Join field could contain timestamp(timestamp can be out of the actual data JSON, so we try this way) joinValue = (joinValue == null) ? timestamp : (joinValue + "\u0001" + timestamp); } //Though join field could be specified like (a,timestamp,b) the value of the join, //field will be (a.value^Ab.value^Atimestamp.value) if b appears after a in the json //object. And timestamp value is always appended to last. return new Tuple2<>(joinValue, rowValues); } @Override public String toString() { return baseAllRows.toString(); } }