com.yahoo.sql4d.sql4ddriver.Joiner4All.java Source code

Java tutorial

Introduction

Here is the source code for com.yahoo.sql4d.sql4ddriver.Joiner4All.java

Source

/**
 * Copyright 2014 Yahoo! Inc. Licensed under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License. 
 * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
 * Unless required by applicable law or agreed to in writing, software distributed
 * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
 * CONDITIONS OF ANY KIND, either express or implied. See the License for the 
 * specific language governing permissions and limitations under the License. 
 * See accompanying LICENSE file.
 */
package com.yahoo.sql4d.sql4ddriver;

import com.yahoo.sql4d.query.RequestType;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.json.JSONArray;
import org.json.JSONObject;
import scala.Tuple2;

/**
 * Provides facilities to do chain of joins.
 *
 * @author srikalyan
 */
public class Joiner4All extends BaseJoiner {

    public Map<Object, List<Object>> baseAllRows = new LinkedHashMap<>();// Each entry = <key value, list of all values>

    public Joiner4All() {
    }

    public Joiner4All(JSONArray jsonAllRows, List<String> joinField) {
        join(jsonAllRows, joinField, ActionType.FIRST_CUT);
    }

    /**
     * Generate a Tuple <A, B>
     * A = list of field names
     * B = map of <joinFeild, rowList> from jsonArray.
     * @param timestamp
     * @param jsonAllRows
     * @param joinFields 
     * @param requestType 
     * @param action 
     */
    @Override
    protected void extractAndTakeAction(String timestamp, JSONArray jsonAllRows, List<String> joinFields,
            RequestType requestType, ActionType action) {
        Map<Object, List<Object>> newBaseAllRows = new LinkedHashMap<>();
        JSONObject eachRow = null;
        for (int i = 0; i < jsonAllRows.length(); i++) {
            JSONObject jsonItem = jsonAllRows.getJSONObject(i);
            eachRow = dataItemAt(jsonItem, requestType, action);
            String actualTimestamp = timestamp;
            if (timestamp == null) {
                if (requestType == RequestType.SELECT) {
                    actualTimestamp = eachRow.optString("timestamp");
                    // Because the timestamp is within each row remove them once you extract it.
                    eachRow.remove("timestamp");
                } else {
                    actualTimestamp = jsonItem.optString("timestamp");
                }
            }
            Tuple2<Object, List<Object>> row = mapPkToRow(actualTimestamp, eachRow, joinFields);
            Object pk = row._1();// Primary key.
            List<Object> rowVal = row._2();
            if (action == ActionType.FIRST_CUT) {// First cut (no join).
                baseAllRows.put(pk, rowVal);
            } else {// Some Join action.
                if (baseAllRows.containsKey(pk)) {// If any item in set B exist in set A.
                    rowVal.removeAll(splitCompositeKeys(pk.toString()));// Remove key(which is already available in baseAllRows)
                    if (!joinFields.contains("timestamp")) {// If join field is not "timestamp" then remove timestamp data value from to be joined set.
                        rowVal.remove(0);// Because the 1st field is always timestamp.(See extractKeyAndRow() method)
                    }
                    if (action == ActionType.JOIN || action == ActionType.RIGHT_JOIN) {
                        newBaseAllRows.put(pk, baseAllRows.remove(pk));// Remove from existing map and add to new map.
                        newBaseAllRows.get(pk).addAll(rowVal);// Update the new map with partial to be joined data.
                    } else if (action == ActionType.LEFT_JOIN) {// Left join
                        baseAllRows.get(pk).addAll(rowVal);
                    }
                } else {// For right join we still need to continue with each row in set B.
                    if (action == ActionType.RIGHT_JOIN) {// Right join
                        newBaseAllRows.put(pk, rowVal);
                    }
                }
            }
            if (i == 0) {// Fill headers (only once)
                fillHeaders(eachRow, joinFields, action);
            }
        }
        if (!newBaseAllRows.isEmpty()) {// For join and right join.
            baseAllRows = newBaseAllRows;
        }
    }

    /**
     * Extract fields(k,v) from json 
     * k = primary field(s) could be a composite key.
     * v = all fields . The first field is always timestamp.
     * Presumption is jsonRow object passed to this method should not have timestamp field.
     * @param timestamp
     * @param jsonRow (This is a jsonObject without timestamp(even for select query response though timestamp is present it is stripped off before passing it to this method)
     * @param joinFields
     * @return 
     */
    private static Tuple2<Object, List<Object>> mapPkToRow(String timestamp, JSONObject jsonRow,
            List<String> joinFields) {
        Object joinValue = null;
        List<Object> rowValues = new ArrayList<>();
        rowValues.add(timestamp);
        for (Object key : jsonRow.keySet()) {
            String colName = key.toString();
            rowValues.add(jsonRow.get(colName));
            if (joinFields.contains(colName)) {
                joinValue = (joinValue == null) ? jsonRow.get(colName)
                        : (joinValue + "\u0001" + jsonRow.get(colName));
            }
        }
        if (joinFields.contains("timestamp")) {// Join field could contain timestamp(timestamp can be out of the actual data JSON, so we try this way)
            joinValue = (joinValue == null) ? timestamp : (joinValue + "\u0001" + timestamp);
        }
        //Though join field could be specified like (a,timestamp,b) the value of the join,
        //field will be (a.value^Ab.value^Atimestamp.value) if b appears after a in the json 
        //object. And timestamp value is always appended to last.
        return new Tuple2<>(joinValue, rowValues);
    }

    @Override
    public String toString() {
        return baseAllRows.toString();
    }

}