com.google.cloud.training.dataanalyst.sandiego.BigtableHelper.java Source code

Java tutorial

Introduction

Here is the source code for com.google.cloud.training.dataanalyst.sandiego.BigtableHelper.java

Source

/*
 * Copyright (C) 2017 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.google.cloud.training.dataanalyst.sandiego;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
import org.apache.beam.sdk.io.gcp.bigtable.BigtableIO;
import org.apache.beam.sdk.extensions.gcp.options.GcpOptions;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;

import com.google.bigtable.admin.v2.ColumnFamily;
import com.google.bigtable.admin.v2.CreateTableRequest;
import com.google.bigtable.admin.v2.GetTableRequest;
import com.google.bigtable.admin.v2.Table;
import com.google.bigtable.v2.Mutation;
import com.google.cloud.bigtable.config.BigtableOptions;
import com.google.cloud.bigtable.config.BigtableOptions.Builder;
import com.google.cloud.bigtable.config.CredentialOptions;
import com.google.cloud.bigtable.grpc.BigtableSession;
import com.google.cloud.bigtable.grpc.BigtableTableAdminClient;
import com.google.protobuf.ByteString;

/**
 * Helper class to stream a PCollection<LaneInfo> to Bigtable
 * 
 * @author vlakshmanan
 *
 */
@SuppressWarnings("serial")
public class BigtableHelper {

    private final static String INSTANCE_ID = "sandiego";
    private final static String TABLE_ID = "current_conditions";
    private final static String CF_FAMILY = "lane";

    public static void writeToBigtable(PCollection<LaneInfo> laneInfo, DataflowPipelineOptions options) {
        BigtableOptions.Builder optionsBuilder = //
                new BigtableOptions.Builder()//
                        .setProjectId(options.getProject()) //
                        .setInstanceId(INSTANCE_ID).setUserAgent("cpb210");
        createEmptyTable(options, optionsBuilder);
        PCollection<KV<ByteString, Iterable<Mutation>>> mutations = toMutations(laneInfo);
        mutations.apply("write:cbt", //
                BigtableIO.write().withBigtableOptions(optionsBuilder.build()).withTableId(TABLE_ID));
    }

    private static void createEmptyTable(DataflowPipelineOptions options, Builder optionsBuilder) {
        Table.Builder tableBuilder = Table.newBuilder();
        ColumnFamily cf = ColumnFamily.newBuilder().build();
        tableBuilder.putColumnFamilies(CF_FAMILY, cf);

        try (BigtableSession session = new BigtableSession(optionsBuilder
                .setCredentialOptions(CredentialOptions.credential(options.as(GcpOptions.class).getGcpCredential()))
                .build())) {
            BigtableTableAdminClient tableAdminClient = session.getTableAdminClient();

            try {
                // if get fails, then create
                String tableName = getTableName(options);
                GetTableRequest.Builder getTableRequestBuilder = GetTableRequest.newBuilder().setName(tableName);
                tableAdminClient.getTable(getTableRequestBuilder.build());
            } catch (Exception e) {
                CreateTableRequest.Builder createTableRequestBuilder = //
                        CreateTableRequest.newBuilder().setParent(getInstanceName(options)) //
                                .setTableId(TABLE_ID).setTable(tableBuilder.build());
                tableAdminClient.createTable(createTableRequestBuilder.build());
            }

        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private static DateTimeFormatter fmt = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");

    private static PCollection<KV<ByteString, Iterable<Mutation>>> toMutations(PCollection<LaneInfo> laneInfo) {
        return laneInfo.apply("pred->mutation", ParDo.of(new DoFn<LaneInfo, KV<ByteString, Iterable<Mutation>>>() {
            @ProcessElement
            public void processElement(ProcessContext c) throws Exception {
                LaneInfo info = c.element();
                DateTime ts = fmt.parseDateTime(info.getTimestamp().replace('T', ' '));

                // key is HIGHWAY#DIR#LANE#REVTS
                String key = info.getHighway() //
                        + "#" + info.getDirection() //
                        + "#" + info.getLane() //
                        + "#" + (Long.MAX_VALUE - ts.getMillis()); // reverse time stamp

                // all the data is in a wide column table with only one column family
                List<Mutation> mutations = new ArrayList<>();
                addCell(mutations, "timestamp", info.getTimestamp(), ts.getMillis());
                addCell(mutations, "latitude", info.getLatitude(), ts.getMillis());
                addCell(mutations, "longitude", info.getLongitude(), ts.getMillis());
                addCell(mutations, "highway", info.getHighway(), ts.getMillis());
                addCell(mutations, "direction", info.getDirection(), ts.getMillis());
                addCell(mutations, "lane", info.getLane(), ts.getMillis());
                addCell(mutations, "speed", info.getSpeed(), ts.getMillis());
                addCell(mutations, "sensorId", info.getSensorKey(), ts.getMillis());
                c.output(KV.of(ByteString.copyFromUtf8(key), mutations));
            }

        }));
    }

    private static void addCell(List<Mutation> mutations, String cellName, double cellValue, long ts) {
        addCell(mutations, cellName, Double.toString(cellValue), ts);
    }

    private static void addCell(List<Mutation> mutations, String cellName, String cellValue, long ts) {
        if (cellValue.length() > 0) {
            ByteString value = ByteString.copyFromUtf8(cellValue);
            ByteString colname = ByteString.copyFromUtf8(cellName);
            Mutation m = //
                    Mutation.newBuilder().setSetCell(//
                            Mutation.SetCell.newBuilder() //
                                    .setValue(value)//
                                    .setFamilyName(CF_FAMILY)//
                                    .setColumnQualifier(colname)//
                                    .setTimestampMicros(ts) //
                    ).build();
            mutations.add(m);
        }
    }

    private static String getInstanceName(DataflowPipelineOptions options) {
        return String.format("projects/%s/instances/%s", options.getProject(), INSTANCE_ID);
    }

    private static String getTableName(DataflowPipelineOptions options) {
        return String.format("%s/tables/%s", getInstanceName(options), TABLE_ID);
    }
}