Java tutorial
/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.github.sakserv.storm; import backtype.storm.Config; import backtype.storm.topology.TopologyBuilder; import com.github.sakserv.datetime.GenerateRandomDay; import com.github.sakserv.kafka.KafkaProducerTest; import com.github.sakserv.minicluster.impl.*; import com.github.sakserv.minicluster.util.FileUtils; import kafka.javaapi.producer.Producer; import kafka.producer.KeyedMessage; import kafka.producer.ProducerConfig; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.*; import org.apache.hadoop.hive.ql.io.orc.OrcSerde; import org.apache.hadoop.hive.serde.Constants; import org.apache.thrift.TException; import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONObject; import org.junit.After; import org.junit.Before; import org.junit.Test; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.InputStreamReader; import java.sql.*; import java.util.*; public class KafkaHiveHdfsTopologyTest { // Kafka static private static final String DEFAULT_LOG_DIR = "embedded_kafka"; private static final String TEST_TOPIC = "test-topic"; private static final Integer KAFKA_PORT = 9092; private static final String LOCALHOST_BROKER = "localhost:" + KAFKA_PORT.toString(); private static final Integer BROKER_ID = 1; // Storm static private static final String TEST_TOPOLOGY_NAME = "test"; // HDFS static private static final String HDFS_OUTPUT_DIR = "/tmp/kafka_data"; // Zookeeper private ZookeeperLocalCluster zkCluster; // Kafka private KafkaLocalBroker kafkaCluster; // Storm private StormLocalCluster stormCluster; // HDFS private HdfsLocalCluster hdfsCluster; @Before public void setUp() { // Start ZK zkCluster = new ZookeeperLocalCluster(); zkCluster.start(); // Start HDFS hdfsCluster = new HdfsLocalCluster(); hdfsCluster.start(); // Start Kafka kafkaCluster = new KafkaLocalBroker(TEST_TOPIC, DEFAULT_LOG_DIR, KAFKA_PORT, BROKER_ID, zkCluster.getZkConnectionString()); kafkaCluster.start(); // Start Storm stormCluster = new StormLocalCluster(zkCluster.getZkHostName(), Long.parseLong(zkCluster.getZkPort())); stormCluster.start(); } @After public void tearDown() { // Stop Storm try { stormCluster.stop(TEST_TOPOLOGY_NAME); } catch (IllegalStateException e) { } // Stop Kafka kafkaCluster.stop(true); // Stop HDFS hdfsCluster.stop(true); // Stop ZK zkCluster.stop(true); } public void runStormKafkaHiveHdfsTopology() { System.out.println("STORM: Starting Topology: " + TEST_TOPOLOGY_NAME); TopologyBuilder builder = new TopologyBuilder(); ConfigureKafkaSpout.configureKafkaSpout(builder, zkCluster.getZkConnectionString(), TEST_TOPIC, "-2"); ConfigureHdfsBolt.configureHdfsBolt(builder, ",", HDFS_OUTPUT_DIR, hdfsCluster.getHdfsUriString()); stormCluster.submitTopology(TEST_TOPOLOGY_NAME, new Config(), builder.createTopology()); } public void validateHdfsResults() throws IOException { System.out.println("HDFS: VALIDATING"); FileSystem hdfsFsHandle = hdfsCluster.getHdfsFileSystemHandle(); RemoteIterator<LocatedFileStatus> listFiles = hdfsFsHandle.listFiles(new Path("/tmp/kafka_data"), true); while (listFiles.hasNext()) { LocatedFileStatus file = listFiles.next(); System.out.println("HDFS READ: Found File: " + file); BufferedReader br = new BufferedReader(new InputStreamReader(hdfsFsHandle.open(file.getPath()))); String line = br.readLine(); while (line != null) { System.out.println("HDFS READ: Found Line: " + line); line = br.readLine(); } } hdfsFsHandle.close(); } @Test public void testKafkaHiveHdfsTopology() throws TException, JSONException, ClassNotFoundException, SQLException, IOException { // Produce test messages to Kafka, start the kafka-hive-hdfs Storm topology // Sleep 10 seconds to let processing complete KafkaProducerTest.produceMessages(LOCALHOST_BROKER, TEST_TOPIC, 50); runStormKafkaHiveHdfsTopology(); try { Thread.sleep(10000L); } catch (InterruptedException e) { System.exit(1); } // To ensure transactions and files are closed, stop storm stormCluster.stop(TEST_TOPOLOGY_NAME); try { Thread.sleep(10000L); } catch (InterruptedException e) { System.exit(1); } // Validate the HDFS files exist validateHdfsResults(); try { Thread.sleep(10000L); } catch (InterruptedException e) { System.exit(1); } } }