Java tutorial
/* * This file is a component of thundr, a software library from 3wks. * Read more: http://www.3wks.com.au/thundr * Copyright (C) 2013 3wks, <thundr@3wks.com.au> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.threewks.thundr.bigquery; import java.io.IOException; import java.util.Collection; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.concurrent.TimeUnit; import jodd.util.StringUtil; import com.google.api.client.http.AbstractInputStreamContent; import com.google.api.client.http.ByteArrayContent; import com.google.api.services.bigquery.Bigquery; import com.google.api.services.bigquery.model.Job; import com.google.api.services.bigquery.model.JobConfiguration; import com.google.api.services.bigquery.model.JobConfigurationLoad; import com.google.api.services.bigquery.model.TableReference; import com.google.appengine.api.taskqueue.Queue; import com.google.appengine.api.taskqueue.TaskHandle; import com.google.appengine.api.taskqueue.TaskOptions; import com.google.appengine.api.taskqueue.TaskOptions.Method; import com.google.common.base.Function; import com.google.common.collect.Maps; import com.threewks.thundr.exception.BaseException; import com.threewks.thundr.logger.Logger; public class BigQueryPushServiceImpl implements BigQueryPushService { private static final String CSV_CONTENT_TYPE = "application/octet-stream"; private Queue bigQueryQueue; private String projectId; private String datasetId; private Bigquery bigQuery; private Map<Class<?>, EventConverter<?>> converters = new HashMap<Class<?>, EventConverter<?>>(); public BigQueryPushServiceImpl(Bigquery bigQuery, Queue bigQueryQueue, String bigQueryProjectId, String bigQueryDatasetId) { this.bigQuery = bigQuery; this.bigQueryQueue = bigQueryQueue; this.projectId = bigQueryProjectId; this.datasetId = bigQueryDatasetId; } private void updateTable(String tableId, String csvData) throws IOException { executeLoad(bigQuery, projectId, datasetId, tableId, csvData); } @Override public <T> void registerEventConverter(EventConverter<T> converter) { converters.put(converter.getEventClass(), converter); } @SuppressWarnings("unchecked") @Override public <T> void trackEvent(T event) { EventConverter<T> converter = (EventConverter<T>) converters.get(event.getClass()); if (converter == null) { throw new BaseException("No converter registered for class %s", event.getClass()); } queueEvent(converter.getTableId(), converter.getCsvData(event)); } private void queueEvent(String tableId, String csvData) { TaskOptions task = TaskOptions.Builder.withMethod(Method.PULL); task.param("tableId", tableId); task.param("data", csvData); bigQueryQueue.add(task); } @Override public int processTasks() throws IOException { Logger.debug("Processing tasks from queue %s\n", bigQueryQueue.getQueueName()); int count = 0; List<TaskHandle> tasks = bigQueryQueue.leaseTasks(10, TimeUnit.MINUTES, 1000); if (!tasks.isEmpty()) { Map<String, String> reportData = buildReportData(tasks); for (Map.Entry<String, String> entry : reportData.entrySet()) { updateTable(entry.getKey(), entry.getValue()); } count += tasks.size(); bigQueryQueue.deleteTask(tasks); } return count; } /** * BigQuery tasks are added to the queue with the parameters 'tableId' and 'data'. * tableId is the id of the table and data is a csv representation of the data to * be added to the table. * * @param a collection of tasks to extract the data from. * @return a map of table ids to csv data. */ private Map<String, String> buildReportData(Collection<TaskHandle> tasks) { Map<String, StringBuilder> reportData = new LinkedHashMap<String, StringBuilder>(); for (TaskHandle task : tasks) { String tableId = getParam(task, "tableId"); String data = getParam(task, "data"); if (StringUtil.isNotBlank(data)) { if (!reportData.containsKey(tableId)) { StringBuilder csv = new StringBuilder(); reportData.put(tableId, csv); } reportData.get(tableId).append(String.format("%s\n", data)); } } // convert the CSV StringBuilders to strings before returning return new LinkedHashMap<String, String>( Maps.transformValues(reportData, new Function<StringBuilder, String>() { public String apply(StringBuilder s) { return s.toString(); } })); } /** * Execute a 'load' API call to append data into an existing BigQuery table. * * @param bigQuery the {@link Bigquery} object to access the BigQuery API. * @param projectId the BigQuery project id. * @param datasetId the BigQuery dataset id. * @param tableId the BigQuery table id. * @param csvData the CSV data to load into the table. * @throws IOException if something goes wrong running the job. */ private void executeLoad(Bigquery bigQuery, String projectId, String datasetId, String tableId, String csvData) throws IOException { TableReference tableReference = new TableReference().setProjectId(projectId).setDatasetId(datasetId) .setTableId(tableId); JobConfigurationLoad loadConfig = new JobConfigurationLoad().setDestinationTable(tableReference) .setAllowQuotedNewlines(true); JobConfiguration config = new JobConfiguration().setLoad(loadConfig); Job job = new Job().setConfiguration(config); AbstractInputStreamContent content = new ByteArrayContent(CSV_CONTENT_TYPE, csvData.getBytes()); bigQuery.jobs().insert(projectId, job, content).execute(); } /** * Get a parameter from a task. * * @param task the task to get the parameter from. * @param name the name of the parameter to get. * @return the value of the parameter or null if no matching parameter could be found. */ private String getParam(TaskHandle task, String name) { try { List<Entry<String, String>> params = task.extractParams(); for (Entry<String, String> param : params) { if (param.getKey().equals(name)) { return param.getValue(); } } } catch (Exception e) { // ignore exceptions, just return null } return null; } }