com.threewks.thundr.bigquery.BigQueryPushServiceImpl.java Source code

Java tutorial

Introduction

Here is the source code for com.threewks.thundr.bigquery.BigQueryPushServiceImpl.java

Source

/*
 * This file is a component of thundr, a software library from 3wks.
 * Read more: http://www.3wks.com.au/thundr
 * Copyright (C) 2013 3wks, <thundr@3wks.com.au>
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.threewks.thundr.bigquery;

import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.TimeUnit;

import jodd.util.StringUtil;

import com.google.api.client.http.AbstractInputStreamContent;
import com.google.api.client.http.ByteArrayContent;
import com.google.api.services.bigquery.Bigquery;
import com.google.api.services.bigquery.model.Job;
import com.google.api.services.bigquery.model.JobConfiguration;
import com.google.api.services.bigquery.model.JobConfigurationLoad;
import com.google.api.services.bigquery.model.TableReference;
import com.google.appengine.api.taskqueue.Queue;
import com.google.appengine.api.taskqueue.TaskHandle;
import com.google.appengine.api.taskqueue.TaskOptions;
import com.google.appengine.api.taskqueue.TaskOptions.Method;
import com.google.common.base.Function;
import com.google.common.collect.Maps;
import com.threewks.thundr.exception.BaseException;
import com.threewks.thundr.logger.Logger;

public class BigQueryPushServiceImpl implements BigQueryPushService {

    private static final String CSV_CONTENT_TYPE = "application/octet-stream";

    private Queue bigQueryQueue;
    private String projectId;
    private String datasetId;
    private Bigquery bigQuery;
    private Map<Class<?>, EventConverter<?>> converters = new HashMap<Class<?>, EventConverter<?>>();

    public BigQueryPushServiceImpl(Bigquery bigQuery, Queue bigQueryQueue, String bigQueryProjectId,
            String bigQueryDatasetId) {
        this.bigQuery = bigQuery;
        this.bigQueryQueue = bigQueryQueue;
        this.projectId = bigQueryProjectId;
        this.datasetId = bigQueryDatasetId;
    }

    private void updateTable(String tableId, String csvData) throws IOException {
        executeLoad(bigQuery, projectId, datasetId, tableId, csvData);
    }

    @Override
    public <T> void registerEventConverter(EventConverter<T> converter) {
        converters.put(converter.getEventClass(), converter);
    }

    @SuppressWarnings("unchecked")
    @Override
    public <T> void trackEvent(T event) {
        EventConverter<T> converter = (EventConverter<T>) converters.get(event.getClass());
        if (converter == null) {
            throw new BaseException("No converter registered for class %s", event.getClass());
        }
        queueEvent(converter.getTableId(), converter.getCsvData(event));
    }

    private void queueEvent(String tableId, String csvData) {
        TaskOptions task = TaskOptions.Builder.withMethod(Method.PULL);
        task.param("tableId", tableId);
        task.param("data", csvData);
        bigQueryQueue.add(task);
    }

    @Override
    public int processTasks() throws IOException {
        Logger.debug("Processing tasks from queue %s\n", bigQueryQueue.getQueueName());

        int count = 0;
        List<TaskHandle> tasks = bigQueryQueue.leaseTasks(10, TimeUnit.MINUTES, 1000);
        if (!tasks.isEmpty()) {
            Map<String, String> reportData = buildReportData(tasks);
            for (Map.Entry<String, String> entry : reportData.entrySet()) {
                updateTable(entry.getKey(), entry.getValue());
            }

            count += tasks.size();
            bigQueryQueue.deleteTask(tasks);
        }
        return count;
    }

    /**
     * BigQuery tasks are added to the queue with the parameters 'tableId' and 'data'.
     * tableId is the id of the table and data is a csv representation of the data to
     * be added to the table.
     * 
     * @param a collection of tasks to extract the data from.
     * @return a map of table ids to csv data.
     */
    private Map<String, String> buildReportData(Collection<TaskHandle> tasks) {
        Map<String, StringBuilder> reportData = new LinkedHashMap<String, StringBuilder>();

        for (TaskHandle task : tasks) {
            String tableId = getParam(task, "tableId");
            String data = getParam(task, "data");
            if (StringUtil.isNotBlank(data)) {

                if (!reportData.containsKey(tableId)) {
                    StringBuilder csv = new StringBuilder();
                    reportData.put(tableId, csv);
                }

                reportData.get(tableId).append(String.format("%s\n", data));
            }
        }

        // convert the CSV StringBuilders to strings before returning
        return new LinkedHashMap<String, String>(
                Maps.transformValues(reportData, new Function<StringBuilder, String>() {
                    public String apply(StringBuilder s) {
                        return s.toString();
                    }
                }));
    }

    /**
     * Execute a 'load' API call to append data into an existing BigQuery table.
     * 
     * @param bigQuery the {@link Bigquery} object to access the BigQuery API.
     * @param projectId the BigQuery project id.
     * @param datasetId the BigQuery dataset id.
     * @param tableId the BigQuery table id.
     * @param csvData the CSV data to load into the table.
     * @throws IOException if something goes wrong running the job.
     */
    private void executeLoad(Bigquery bigQuery, String projectId, String datasetId, String tableId, String csvData)
            throws IOException {
        TableReference tableReference = new TableReference().setProjectId(projectId).setDatasetId(datasetId)
                .setTableId(tableId);
        JobConfigurationLoad loadConfig = new JobConfigurationLoad().setDestinationTable(tableReference)
                .setAllowQuotedNewlines(true);
        JobConfiguration config = new JobConfiguration().setLoad(loadConfig);
        Job job = new Job().setConfiguration(config);
        AbstractInputStreamContent content = new ByteArrayContent(CSV_CONTENT_TYPE, csvData.getBytes());
        bigQuery.jobs().insert(projectId, job, content).execute();
    }

    /**
     * Get a parameter from a task.
     * 
     * @param task the task to get the parameter from.
     * @param name the name of the parameter to get.
     * @return the value of the parameter or null if no matching parameter could be found.
     */
    private String getParam(TaskHandle task, String name) {
        try {
            List<Entry<String, String>> params = task.extractParams();
            for (Entry<String, String> param : params) {
                if (param.getKey().equals(name)) {
                    return param.getValue();
                }
            }
        } catch (Exception e) {
            // ignore exceptions, just return null
        }
        return null;
    }

}