com.google.cloud.bigtable.dataflowimport.testing.SequenceFileIoUtils.java Source code

Java tutorial

Introduction

Here is the source code for com.google.cloud.bigtable.dataflowimport.testing.SequenceFileIoUtils.java

Source

/*
 * Copyright 2015 Google Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.google.cloud.bigtable.dataflowimport.testing;

import com.google.cloud.bigtable.dataflowimport.HBaseImportIO;
import com.google.cloud.bigtable.dataflowimport.HBaseImportOptions;
import com.google.cloud.dataflow.sdk.io.BoundedSource;
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
import com.google.cloud.dataflow.sdk.testing.SourceTestUtils;
import com.google.cloud.dataflow.sdk.values.KV;
import com.google.common.base.Function;
import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Multimaps;
import com.google.common.collect.Sets;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.ResultSerialization;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.Writer;
import org.apache.hadoop.io.serializer.WritableSerialization;

import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Set;

/**
 * Helper methods for creating and reading HBase Sequence Files in tests.
 */
public class SequenceFileIoUtils {

    /**
     * Creates a Sequence File and writes the content of {@code keyValues} to it.
     */
    public static void createFileWithData(File sequenceFile, Collection<? extends Cell> keyValues)
            throws IOException {
        Configuration configuration = new Configuration();
        // Configure serializers for Sequence File writer.
        configuration.set("io.serializations",
                WritableSerialization.class.getName() + "," + ResultSerialization.class.getName());
        // Group cells by rowKey.
        ImmutableListMultimap<byte[], ? extends Cell> records = Multimaps.index(keyValues,
                new Function<Cell, byte[]>() {
                    @Override
                    public byte[] apply(Cell cell) {
                        return CellUtil.cloneRow(cell);
                    }
                });
        try (Writer writer = SequenceFile.createWriter(configuration, Writer.keyClass(ImmutableBytesWritable.class),
                Writer.valueClass(Result.class), Writer.file(new Path(sequenceFile.toURI())))) {

            for (Collection<? extends Cell> cells : records.asMap().values()) {
                Result row = Result.create(cells.toArray(new Cell[cells.size()]));
                writer.append(new ImmutableBytesWritable(row.getRow()), row);
            }
            writer.close();
        }
    }

    /**
     * Returns a {@link Set} of {@link Cell}s from the content of a Sequence File.
     */
    public static Set<? extends Cell> readCellsFromSource(HBaseImportOptions importOptions) throws IOException {
        BoundedSource<KV<ImmutableBytesWritable, Result>> source = HBaseImportIO.createSource(importOptions);
        Iterable<List<Cell>> cellsByRow = Iterables.transform(
                SourceTestUtils.readFromSource(source, PipelineOptionsFactory.create()),
                new Function<KV<ImmutableBytesWritable, Result>, List<Cell>>() {
                    @Override
                    public List<Cell> apply(KV<ImmutableBytesWritable, Result> kv) {
                        return kv.getValue().listCells();
                    }
                });
        Set<Cell> cells = Sets.newHashSet();
        for (List<Cell> rowCells : cellsByRow) {
            cells.addAll(rowCells);
        }
        return cells;
    }
}