hydrograph.engine.hive.scheme.ParquetHiveTest.java Source code

Java tutorial

Introduction

Here is the source code for hydrograph.engine.hive.scheme.ParquetHiveTest.java

Source

/*******************************************************************************
 * Copyright 2017 Capital One Services, LLC and Bitwise, Inc.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License
 *******************************************************************************/
package hydrograph.engine.hive.scheme;

import cascading.flow.FlowDef;
import cascading.flow.hadoop2.Hadoop2MR1FlowConnector;
import cascading.pipe.Each;
import cascading.pipe.Pipe;
import cascading.scheme.hadoop.TextDelimited;
import cascading.tap.SinkMode;
import cascading.tap.Tap;
import cascading.tap.hadoop.Hfs;
import cascading.tap.hive.HiveTableDescriptor;
import cascading.tap.hive.HiveTap;
import cascading.tuple.Fields;
import cascading.tuple.type.DateType;
import hydrograph.engine.cascading.scheme.hive.parquet.HiveParquetScheme;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.GenericOptionsParser;

import java.io.IOException;
import java.math.BigDecimal;
import java.util.Properties;

public class ParquetHiveTest {

    public static void main(String args[]) throws IOException {

        Configuration conf = new Configuration();
        String[] otherArgs;
        otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
        String argsString = "";
        for (String arg : otherArgs) {
            argsString = argsString + " " + arg;
        }
        System.out.println("After processing arguments are:" + argsString);
        Properties properties = new Properties();
        properties.putAll(conf.getValByRegex(".*"));
        DateType dt = new DateType("yyyy-MM-dd");
        Fields fields = new Fields("a", "b", "c", "d", "e", "f").applyTypes(String.class, Integer.class, Long.class,
                BigDecimal.class, dt, String.class);
        Tap source = new Hfs(new TextDelimited(fields, true, ","), "data/output_testalltype");

        HiveTableDescriptor hiveTableDescriptor = new HiveTableDescriptor("testing23",
                new String[] { "a", "b", "c", "d", "e", "new" },
                new String[] { "string", "int", "bigint", "decimal(10,2)", "date", "array<int>" });

        Tap sink = new HiveTap(hiveTableDescriptor, new HiveParquetScheme(hiveTableDescriptor), SinkMode.REPLACE,
                false);

        Pipe pipe = new Pipe("pipe");

        pipe = new Each(pipe, fields, new Custome1(new Fields("new")), new Fields("a", "b", "c", "d", "e", "new"));

        FlowDef def = FlowDef.flowDef().addSource(pipe, source).addTailSink(pipe, sink);

        new Hadoop2MR1FlowConnector(properties).connect(def).complete();

    }
}