Example usage for org.apache.hadoop.mapreduce Job getInstance

List of usage examples for org.apache.hadoop.mapreduce Job getInstance

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getInstance.

Prototype

@Deprecated
public static Job getInstance(Cluster ignored) throws IOException 

Source Link

Document

Creates a new Job with no particular Cluster .

Usage

From source file:com.thinkbiganalytics.kylo.catalog.spark.sources.spark.HighWaterMarkInputFormatTest.java

License:Apache License

/**
 * Verify listing files before a maximum age.
 *///  w w w.j  a v a2s  .  c  o  m
@Test
public void listStatusMaxFileAge() throws IOException {
    // Create temp file
    final File file1 = tempFolder.newFile("file1");
    Assert.assertTrue(file1.setLastModified(currentTimeMillis));

    final File file2 = tempFolder.newFile("file2");
    Assert.assertTrue(file2.setLastModified(currentTimeMillis - 2000));

    final File file3 = tempFolder.newFile("file3");
    Assert.assertTrue(file3.setLastModified(currentTimeMillis - 1000));

    // Test listing files with high water mark
    final Job job = Job.getInstance(new Configuration(false));
    HighWaterMarkInputFormat.setInputPaths(job, tempFolder.getRoot().getAbsolutePath());
    HighWaterMarkInputFormat.setMaxFileAge(job, 1000);

    final HighWaterMarkInputFormat inputFormat = new MockHighWaterMarkInputFormat();
    final List<FileStatus> files = inputFormat.listStatus(job);
    Collections.sort(files, new FileStatusComparator());
    Assert.assertEquals(new Path(file1.toURI()), files.get(0).getPath());
    Assert.assertEquals(new Path(file3.toURI()), files.get(1).getPath());
    Assert.assertEquals(2, files.size());
}

From source file:com.thinkbiganalytics.kylo.catalog.spark.sources.spark.HighWaterMarkInputFormatTest.java

License:Apache License

/**
 * Verify listing files after a minimum age.
 *//*from ww  w  .j a  va2  s.  co m*/
@Test
public void listStatusMinFileAge() throws IOException {
    // Create temp file
    final File file1 = tempFolder.newFile("file1");
    Assert.assertTrue(file1.setLastModified(currentTimeMillis));

    final File file2 = tempFolder.newFile("file2");
    Assert.assertTrue(file2.setLastModified(currentTimeMillis - 2000));

    final File file3 = tempFolder.newFile("file3");
    Assert.assertTrue(file3.setLastModified(currentTimeMillis - 1000));

    // Test listing files with high water mark
    final Job job = Job.getInstance(new Configuration(false));
    HighWaterMarkInputFormat.setInputPaths(job, tempFolder.getRoot().getAbsolutePath());
    HighWaterMarkInputFormat.setMinFileAge(job, 1000);

    final HighWaterMarkInputFormat inputFormat = new MockHighWaterMarkInputFormat();
    final List<FileStatus> files = inputFormat.listStatus(job);
    Collections.sort(files, new FileStatusComparator());
    Assert.assertEquals(new Path(file2.toURI()), files.get(0).getPath());
    Assert.assertEquals(new Path(file3.toURI()), files.get(1).getPath());
    Assert.assertEquals(2, files.size());
}

From source file:com.thinkbiganalytics.kylo.catalog.spark.sources.spark.HighWaterMarkInputFormatTest.java

License:Apache License

@Test(expected = IOException.class)
public void listStatusMinAfterMax() throws IOException {
    final Job job = Job.getInstance(new Configuration(false));
    HighWaterMarkInputFormat.setMaxFileAge(job, 0);
    HighWaterMarkInputFormat.setMinFileAge(job, 1);

    final HighWaterMarkInputFormat inputFormat = new MockHighWaterMarkInputFormat();
    inputFormat.listStatus(job);// www. j  av  a  2  s. c  o  m
}

From source file:com.thinkbiganalytics.kylo.catalog.spark.sources.spark.SparkDataSetContext.java

License:Apache License

/**
 * Resolves the specified URIs by removing files that have been previously read.
 *
 * @throws KyloCatalogException if a data set option is invalid
 * @throws IOException          if an I/O error occurs
 *//*w  w  w  . j a va 2 s . c  om*/
@Nonnull
@SuppressWarnings({ "squid:HiddenFieldCheck", "squid:S1192" })
private List<String> resolveHighWaterMarkPaths(@Nonnull final List<String> uris) throws IOException {
    // Get configuration
    final Configuration conf = delegate.getHadoopConfiguration(client);
    final String highWaterMarkName = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.HIGH_WATER_MARK),
            SparkUtil.getOrElse(getOption(HIGH_WATER_MARK_OPTION), null));
    final Job job = Job.getInstance(conf);

    final String highWaterMarkValue = client.getHighWaterMarks().get(highWaterMarkName);
    if (highWaterMarkValue != null) {
        try {
            HighWaterMarkInputFormat.setHighWaterMark(job, Long.parseLong(highWaterMarkValue));
        } catch (final NumberFormatException e) {
            throw new KyloCatalogException(
                    "Invalid " + HIGH_WATER_MARK_OPTION + " value: " + highWaterMarkValue, e);
        }
    }

    final String maxFileAge = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.MAX_FILE_AGE),
            SparkUtil.getOrElse(getOption(MAX_AGE_OPTION), null));
    if (maxFileAge != null) {
        try {
            HighWaterMarkInputFormat.setMaxFileAge(job, Long.parseLong(maxFileAge));
        } catch (final NumberFormatException e) {
            throw new KyloCatalogException("Invalid " + MAX_AGE_OPTION + " value: " + maxFileAge, e);
        }
    }

    final String minFileAge = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.MIN_FILE_AGE),
            SparkUtil.getOrElse(getOption(MIN_AGE_OPTION), null));
    if (minFileAge != null) {
        try {
            HighWaterMarkInputFormat.setMinFileAge(job, Long.parseLong(minFileAge));
        } catch (final NumberFormatException e) {
            throw new KyloCatalogException("Invalid " + MIN_AGE_OPTION + " value: " + minFileAge, e);
        }
    }

    // Convert URIs to Paths
    final Path[] paths = new Path[uris.size()];

    for (int i = 0; i < uris.size(); ++i) {
        final Path path = new Path(uris.get(i));
        final FileSystem fs = path.getFileSystem(conf);
        paths[i] = path.makeQualified(fs.getUri(), fs.getWorkingDirectory());
    }

    HighWaterMarkInputFormat.setInputPaths(job, paths);

    // Get high water mark paths
    final HighWaterMarkInputFormat inputFormat = new HighWaterMarkInputFormat();
    final List<FileStatus> files = inputFormat.listStatus(job);
    client.setHighWaterMarks(
            Collections.singletonMap(highWaterMarkName, Long.toString(inputFormat.getLastHighWaterMark())));

    // Return resolved paths
    final List<String> resolvedPaths = new ArrayList<>(files.size());

    if (files.isEmpty()) {
        resolvedPaths.add("file:/dev/null");
    } else {
        for (final FileStatus file : files) {
            resolvedPaths.add(file.getPath().toString());
        }
    }

    return resolvedPaths;
}

From source file:com.trexinhca.TrexinHCATest.java

License:Apache License

public static void main(String[] args) throws Exception {

    ks = KieServices.Factory.get();/*from   w  ww. ja  v  a 2s  .  c  om*/
    kContainer = ks.getKieClasspathContainer();
    ksession = TrexinHCATest.kContainer.newKieSession("MapReduceKS");
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: TrexinHCATest <in> [<in>...] <out>");
        System.exit(2);
    }
    Job job = Job.getInstance(conf);
    job.setJobName("HCATest");
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(TrexinHCAReducer.class);
    job.setReducerClass(TrexinHCAReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(TextOutputFormat.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    job.setJarByClass(TrexinHCATest.class);
    job.waitForCompletion(true);

}

From source file:com.uber.hoodie.hadoop.HoodieInputFormat.java

License:Apache License

@Override
public FileStatus[] listStatus(JobConf job) throws IOException {
    // Get all the file status from FileInputFormat and then do the filter
    FileStatus[] fileStatuses = super.listStatus(job);
    Map<HoodieTableMetaClient, List<FileStatus>> groupedFileStatus = groupFileStatus(fileStatuses);
    LOG.info("Found a total of " + groupedFileStatus.size() + " groups");
    List<FileStatus> returns = new ArrayList<>();
    for (Map.Entry<HoodieTableMetaClient, List<FileStatus>> entry : groupedFileStatus.entrySet()) {
        HoodieTableMetaClient metadata = entry.getKey();
        if (metadata == null) {
            // Add all the paths which are not hoodie specific
            returns.addAll(entry.getValue());
            continue;
        }//from   w w  w  .  j a  v a 2  s. c o  m

        FileStatus[] statuses = entry.getValue().toArray(new FileStatus[entry.getValue().size()]);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Hoodie Metadata initialized with completed commit Ts as :" + metadata);
        }
        String tableName = metadata.getTableConfig().getTableName();
        String mode = HoodieHiveUtil.readMode(Job.getInstance(job), tableName);
        // Get all commits, delta commits, compactions, as all of them produce a base parquet file
        // today
        HoodieTimeline timeline = metadata.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
        TableFileSystemView.ReadOptimizedView roView = new HoodieTableFileSystemView(metadata, timeline,
                statuses);

        if (HoodieHiveUtil.INCREMENTAL_SCAN_MODE.equals(mode)) {
            // this is of the form commitTs_partition_sequenceNumber
            String lastIncrementalTs = HoodieHiveUtil.readStartCommitTime(Job.getInstance(job), tableName);
            // Total number of commits to return in this batch. Set this to -1 to get all the commits.
            Integer maxCommits = HoodieHiveUtil.readMaxCommits(Job.getInstance(job), tableName);
            LOG.info("Last Incremental timestamp was set as " + lastIncrementalTs);
            List<String> commitsToReturn = timeline.findInstantsAfter(lastIncrementalTs, maxCommits)
                    .getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
            List<HoodieDataFile> filteredFiles = roView.getLatestDataFilesInRange(commitsToReturn)
                    .collect(Collectors.toList());
            for (HoodieDataFile filteredFile : filteredFiles) {
                LOG.info("Processing incremental hoodie file - " + filteredFile.getPath());
                filteredFile = checkFileStatus(filteredFile);
                returns.add(filteredFile.getFileStatus());
            }
            LOG.info("Total paths to process after hoodie incremental filter " + filteredFiles.size());
        } else {
            // filter files on the latest commit found
            List<HoodieDataFile> filteredFiles = roView.getLatestDataFiles().collect(Collectors.toList());
            LOG.info("Total paths to process after hoodie filter " + filteredFiles.size());
            for (HoodieDataFile filteredFile : filteredFiles) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Processing latest hoodie file - " + filteredFile.getPath());
                }
                filteredFile = checkFileStatus(filteredFile);
                returns.add(filteredFile.getFileStatus());
            }
        }
    }
    return returns.toArray(new FileStatus[returns.size()]);

}

From source file:com.uber.hoodie.utilities.HDFSParquetImporter.java

License:Apache License

protected JavaRDD<HoodieRecord<HoodieRecordPayload>> buildHoodieRecordsForImport(JavaSparkContext jsc,
        String schemaStr) throws IOException {
    Job job = Job.getInstance(jsc.hadoopConfiguration());
    // Allow recursive directories to be found
    job.getConfiguration().set(FileInputFormat.INPUT_DIR_RECURSIVE, "true");
    // To parallelize reading file status.
    job.getConfiguration().set(FileInputFormat.LIST_STATUS_NUM_THREADS, "1024");
    AvroReadSupport.setAvroReadSchema(jsc.hadoopConfiguration(), (new Schema.Parser().parse(schemaStr)));
    ParquetInputFormat.setReadSupportClass(job, (AvroReadSupport.class));

    return jsc//from  ww  w  .jav  a2 s  .co  m
            .newAPIHadoopFile(cfg.srcPath, ParquetInputFormat.class, Void.class, GenericRecord.class,
                    job.getConfiguration())
            // To reduce large number of
            // tasks.
            .coalesce(16 * cfg.parallelism).map(entry -> {
                GenericRecord genericRecord = ((Tuple2<Void, GenericRecord>) entry)._2();
                Object partitionField = genericRecord.get(cfg.partitionKey);
                if (partitionField == null) {
                    throw new HoodieIOException("partition key is missing. :" + cfg.partitionKey);
                }
                Object rowField = genericRecord.get(cfg.rowKey);
                if (rowField == null) {
                    throw new HoodieIOException("row field is missing. :" + cfg.rowKey);
                }
                String partitionPath = partitionField.toString();
                logger.info("Row Key : " + rowField + ", Partition Path is (" + partitionPath + ")");
                if (partitionField instanceof Number) {
                    try {
                        long ts = (long) (Double.parseDouble(partitionField.toString()) * 1000L);
                        partitionPath = PARTITION_FORMATTER.format(new Date(ts));
                    } catch (NumberFormatException nfe) {
                        logger.warn("Unable to parse date from partition field. Assuming partition as ("
                                + partitionField + ")");
                    }
                }
                return new HoodieRecord<>(new HoodieKey((String) rowField, partitionPath),
                        new HoodieJsonPayload(genericRecord.toString()));
            });
}

From source file:com.wipro.ats.bdre.datagen.mr.Driver.java

License:Apache License

/**
 * @param args the cli arguments/*from  w w w  .j a v  a  2  s.c om*/
 */
@Override
public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Configuration conf = getConf();
    GetGeneralConfig generalConfig = new GetGeneralConfig();
    GeneralConfig gc = generalConfig.byConigGroupAndKey("imconfig", "common.default-fs-name");
    conf.set("fs.defaultFS", gc.getDefaultVal());

    String processId = args[0];
    Path outputDir = new Path(ResolvePath.replaceVars(args[1]));

    Properties dataProps = Config.getDataProperties(processId);
    Properties tableProps = Config.getTableProperties(processId);

    TableUtil tableUtil = new TableUtil();
    Table table = tableUtil.formTableFromConfig(processId);
    FileSystem fs = FileSystem.get(conf);
    LOGGER.info("Default FS =" + conf.get("fs.defaultFS"));
    //set in the conf for mappers to use
    conf.set(Config.SEPARATOR_KEY, tableProps.getProperty("separator"));
    conf.set(Config.PID_KEY, processId);
    conf.setLong(Config.NUM_ROWS_KEY, Long.parseLong(dataProps.getProperty("numRows")));
    conf.setInt(Config.NUM_SPLITS_KEY, Integer.parseInt(dataProps.getProperty("numSplits")));

    Job job = Job.getInstance(conf);
    Path mrOutputPath = new Path(outputDir.toString() + "/MROUT/" + table.getTableName());

    FileOutputFormat.setOutputPath(job, mrOutputPath);
    job.setJobName("Datagen-" + table.getTableName());
    job.setJarByClass(Driver.class);
    job.setMapperClass(RecordGenMapper.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(RangeInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.waitForCompletion(true);

    //merge and create a single file

    Path srcDir = mrOutputPath;
    Path destFile = new Path(outputDir.toString() + "/" + table.getTableName());
    FileUtil.copyMerge(fs, srcDir, fs, destFile, true, conf, "");

    //Return file info oozie params
    RegisterFileInfo registerFileInfo = new RegisterFileInfo();
    registerFileInfo.setBatchId(null);
    registerFileInfo.setCreationTs(new Timestamp(new Date().getTime()));
    registerFileInfo.setFileHash("0");
    registerFileInfo.setFileSize(0L);
    registerFileInfo.setPath(destFile.toString());
    registerFileInfo.setSubProcessId(Integer.parseInt(processId));
    OozieUtil oozieUtil = new OozieUtil();
    oozieUtil.persistBeanData(registerFileInfo, false);
    return 0;
}

From source file:com.wipro.ats.bdre.dq.DQDriver.java

License:Apache License

@Override
public int run(String[] arg) throws Exception {
    String processId = arg[0];//from   w  ww .  ja v a  2s.  c om
    String sPath = arg[1];
    String destDir = arg[2];

    Properties props = new GetProperties().getProperties(processId, "dq");
    LOGGER.debug("props=" + props);
    Configuration conf = getConf();

    conf.set("dq.process.id", processId);
    Job job = Job.getInstance(conf);
    job.setJobName("Data Quality " + processId);
    job.setJarByClass(DQDriver.class);
    job.setMapperClass(DQMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    //Reducer is not required
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);
    Path inputFilePath = new Path(sPath);
    FileInputFormat.addInputPath(job, inputFilePath);
    FileOutputFormat.setOutputPath(job, removeIfExistAndSetOutputPath(conf, destDir));
    MultipleOutputs.addNamedOutput(job, DQConstants.GOOD_RECORDS_FILE, TextOutputFormat.class, Text.class,
            NullWritable.class);
    MultipleOutputs.addNamedOutput(job, DQConstants.BAD_RECORDS_FILE, TextOutputFormat.class, Text.class,
            NullWritable.class);
    MultipleOutputs.addNamedOutput(job, DQConstants.FILE_REPORT_FILE, TextOutputFormat.class, Text.class,
            NullWritable.class);

    if (!job.waitForCompletion(true)) {
        return 1;
    }

    Path outputDir = new Path(destDir);
    FileSystem srcFs = outputDir.getFileSystem(getConf());
    FileSystem destFs = outputDir.getFileSystem(getConf());

    //Valid Records
    Path goodFilesSrcDir = new Path(destDir + "/" + DQConstants.INTERMEDIATE_GOOD_RECORD_OUTPUT_DIR);
    //Input and quality filtered file should have same name (but different path)
    Path goodDestFile = new Path(destDir + "/" + inputFilePath.getName());
    if (srcFs.exists(goodFilesSrcDir)) {
        FileUtil.copyMerge(srcFs, goodFilesSrcDir, destFs, goodDestFile, true, conf, "");
    }
    // Invalid Records
    Path badFilesSrcDir = new Path(destDir + "/" + DQConstants.INTERMEDIATE_BAD_RECORD_OUTPUT_DIR);
    Path badDestFile = new Path(destDir + "/" + DQConstants.BAD_RECORDS_FILE);
    if (srcFs.exists(badFilesSrcDir)) {
        FileUtil.copyMerge(srcFs, badFilesSrcDir, destFs, badDestFile, true, conf, "");
    }

    // Preparing report aggregation job
    Job fileReportAggregationJob = Job.getInstance(conf);
    fileReportAggregationJob.setJobName("File Report Computing " + processId);
    fileReportAggregationJob.setJarByClass(DQMain.class);

    fileReportAggregationJob.setMapperClass(DQFileReportMapper.class);
    fileReportAggregationJob.setMapOutputKeyClass(Text.class);
    fileReportAggregationJob.setMapOutputValueClass(IntWritable.class);

    fileReportAggregationJob.setReducerClass(DQFileReportReducer.class);
    fileReportAggregationJob.setOutputKeyClass(Text.class);
    fileReportAggregationJob.setOutputValueClass(Text.class);

    fileReportAggregationJob.setNumReduceTasks(1);

    Path fileReportDir = new Path(destDir + "/" + DQConstants.INTERMEDIATE_REPORT_OUTPUT_DIR);
    Path fileReportOutputDir = new Path(destDir + "/" + DQConstants.AGGREGATED_REPORT_PLACEHOLDER_FOLDER);

    FileInputFormat.addInputPath(fileReportAggregationJob, fileReportDir);
    FileOutputFormat.setOutputPath(fileReportAggregationJob, fileReportOutputDir);

    if (!fileReportAggregationJob.waitForCompletion(true)) {
        return 1;
    }

    // Merge Report Records MR stuffs
    Path reportsSrcDir = new Path(destDir + "/" + DQConstants.AGGREGATED_REPORT_PLACEHOLDER_FOLDER);
    Path reportsDestFile = new Path(destDir + "/" + DQConstants.FILE_REPORT_FILE);
    FileUtil.copyMerge(srcFs, reportsSrcDir, destFs, reportsDestFile, true, conf, "");

    Path reportDestFile = new Path(outputDir.toString() + "/" + DQConstants.FILE_REPORT_FILE);
    //Read the report file from HDFS and report the percentage
    DQStats dqStats = getQualityStats(getConf(), reportDestFile);
    LOGGER.info("Percentage of good records :" + dqStats.getGoodPercent());
    props = new GetProperties().getProperties(processId, "dq");
    String strThreshold = props.getProperty("min.pass.threshold.percent");
    float threshold = Float.parseFloat(strThreshold);
    dqStats.setThreshold(threshold);
    //Update the result in metadata
    logResult(dqStats, processId, 0L);
    if (dqStats.getGoodPercent() < threshold) {
        LOGGER.error("DQ check did not pass");
        throw new DQValidationException(dqStats);
    }
    LOGGER.info(dqStats);
    FileChecksum hdfsChecksum = destFs.getFileChecksum(goodDestFile);
    String fileHash = hdfsChecksum == null ? "0" : hdfsChecksum.toString();
    //Return file info oozie params
    RegisterFileInfo registerFileInfo = new RegisterFileInfo();
    registerFileInfo.setBatchId(null);
    registerFileInfo.setCreationTs(new Timestamp(new Date().getTime()));
    registerFileInfo.setFileHash(fileHash);
    registerFileInfo.setFileSize(destFs.getFileStatus(goodDestFile).getLen());
    registerFileInfo.setPath(goodDestFile.toString());
    registerFileInfo.setSubProcessId(Integer.parseInt(processId));
    OozieUtil oozieUtil = new OozieUtil();
    oozieUtil.persistBeanData(registerFileInfo, false);

    return 0;
}

From source file:com.xoriant.kafkaProducer.MyConsumer.java

License:Apache License

public static void main(String[] args) throws IOException {
    // System.setProperty("spark.executor.memory", "8g");
    System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf();
    // final Configuration config = new Configuration();
    Configuration hadoopConfig = new Configuration();
    hadoopConfig.set("mapreduce.output.textoutputformat.separator", ",");
    sparkConf.setMaster("local[2]");
    sparkConf.setAppName("Insurance");
    JavaSparkContext javaSparkContext = new JavaSparkContext(sparkConf);

    JavaStreamingContext javaStreamingContext = new JavaStreamingContext(javaSparkContext, new Duration(500));

    int numThreads = Integer.parseInt(args[3]);
    Map<String, Integer> topicMap = new HashMap<String, Integer>();
    String[] topics = args[2].split(",");
    for (String topic : topics) {
        topicMap.put(topic, numThreads);
    }// ww w .j av a2  s.c o m

    // 3. create connection with HBase
    Configuration config = null;

    try {
        config = HBaseConfiguration.create();
        config.set("hbase.zookeeper.quorum", "192.168.1.114");
        config.set("hbase.zookeeper.property.clientPort", "2181");

        // config.set("mapreduce.job.output.key.class",
        // Text.class.getName());
        // config.set("mapreduce.job.output.value.class",
        // IntWritable.class.getName());
        // config.set("mapreduce.outputformat.class" ,
        // TableOutputFormat.class.getName());
        // config.set("hbase.master", "127.0.0.1:60000");
        HBaseAdmin.checkHBaseAvailable(config);

        System.out.println("HBase is running!");
    } catch (MasterNotRunningException e) {
        System.out.println("HBase is not running!");
        System.exit(1);
    } catch (Exception ce) {
        System.out.println("here.....");
        ce.printStackTrace();
    }

    // config.set(TableInputFormat.INPUT_TABLE, rawTableName);

    // 4. new Hadoop API configuration
    final Job newAPIJobConfigurationState = Job.getInstance(config);
    newAPIJobConfigurationState.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, stateTable);
    newAPIJobConfigurationState.setOutputFormatClass(org.apache.hadoop.hbase.mapreduce.TableOutputFormat.class);

    final Job newAPIJobConfigurationUser = Job.getInstance(config);
    newAPIJobConfigurationUser.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, "user_total_stream");
    newAPIJobConfigurationUser.setOutputFormatClass(org.apache.hadoop.hbase.mapreduce.TableOutputFormat.class);

    final Job paymentHistoryConfig = Job.getInstance(config);
    paymentHistoryConfig.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, "payment_history_stream");
    paymentHistoryConfig.setOutputFormatClass(org.apache.hadoop.hbase.mapreduce.TableOutputFormat.class);
    /*
     * Set<String> topics = new HashSet<String>(); topics.add("test");
     * 
     * 
     * Map<String, String> kafkaParams = new HashMap<String, String>();
     * kafkaParams.put("metadata.broker.list", "10.20.0.199:9092");
     */
    /*
     * JavaPairInputDStream<String, String> stream = KafkaUtils
     * .createDirectStream(javaStreamingContext, String.class, String.class,
     * StringDecoder.class, StringDecoder.class, kafkaParams, topics);
     */

    JavaPairReceiverInputDStream<String, String> stream = KafkaUtils.createStream(javaStreamingContext, args[0],
            args[1], topicMap);

    System.out.println(
            "Got my DStream! connecting to zookeeper " + args[0] + " group " + args[1] + " topics" + topicMap);

    stream.count().print();

    JavaDStream<Tuple11<String, String, String, String, String, String, String, String, String, String, String>> records = stream
            .map(new Function<Tuple2<String, String>, Tuple11<String, String, String, String, String, String, String, String, String, String, String>>() {

                private static final long serialVersionUID = 1L;

                public Tuple11<String, String, String, String, String, String, String, String, String, String, String> call(
                        Tuple2<String, String> defaultKeyAndRecords) throws Exception {

                    String[] fields = defaultKeyAndRecords._2().split(",");

                    return new Tuple11<String, String, String, String, String, String, String, String, String, String, String>(
                            fields[0], fields[1], fields[2], fields[3], fields[4], fields[5], fields[6],
                            fields[7], fields[8], fields[9], fields[10]);
                }
            });

    records.foreachRDD(
            new Function<JavaRDD<Tuple11<String, String, String, String, String, String, String, String, String, String, String>>, Void>() {
                private static final long serialVersionUID = -3333697808496161495L;

                public Void call(
                        JavaRDD<Tuple11<String, String, String, String, String, String, String, String, String, String, String>> rdd)
                        throws Exception {
                    saveToHBasePaymentHistory(rdd, paymentHistoryConfig.getConfiguration());
                    return null;
                }
            });

    JavaPairDStream<String, String> window = records.mapToPair(
            new PairFunction<Tuple11<String, String, String, String, String, String, String, String, String, String, String>, String, String>() {

                private static final long serialVersionUID = -8849699432349098738L;

                public Tuple2<String, String> call(
                        Tuple11<String, String, String, String, String, String, String, String, String, String, String> arg0)
                        throws Exception {

                    String str = arg0._2() + "," + arg0._3() + "," + arg0._4() + "," + arg0._5() + ","
                            + arg0._6() + "," + arg0._7() + "," + arg0._8() + "," + arg0._9() + "," + arg0._10()
                            + "," + arg0._11();

                    return new Tuple2<String, String>(arg0._1(), str);
                }
            }).window(new Duration(60000), new Duration(60000));

    window.saveAsNewAPIHadoopFiles("hdfs://192.168.1.114/user/hadoop/StreamingData/Insurancedata", "",
            Text.class, Text.class, TextOutputFormat.class, hadoopConfig);

    JavaPairDStream<String, Integer> recordsMapState = records.mapToPair(
            new PairFunction<Tuple11<String, String, String, String, String, String, String, String, String, String, String>, String, Integer>() {
                private static final long serialVersionUID = 1L;

                public Tuple2<String, Integer> call(
                        Tuple11<String, String, String, String, String, String, String, String, String, String, String> arg0)
                        throws Exception {
                    String key = arg0._10();
                    Integer value = new Integer(arg0._7());

                    return new Tuple2<String, Integer>(key, value);
                }

            });

    JavaPairDStream<String, Integer> recordsMapUser = records.mapToPair(
            new PairFunction<Tuple11<String, String, String, String, String, String, String, String, String, String, String>, String, Integer>() {
                private static final long serialVersionUID = 1L;

                public Tuple2<String, Integer> call(
                        Tuple11<String, String, String, String, String, String, String, String, String, String, String> arg0)
                        throws Exception {
                    String key = arg0._1();
                    Integer value = new Integer(arg0._7());

                    return new Tuple2<String, Integer>(key, value);
                }

            });

    JavaPairDStream<String, Integer> reduceByKeyAndWindowState = recordsMapState
            .reduceByKeyAndWindow(new Function2<Integer, Integer, Integer>() {
                private static final long serialVersionUID = 197675516004789269L;

                public Integer call(Integer val1, Integer val2) throws Exception {
                    return val1 + val2;

                }
            }, new Duration(86400000), new Duration(10000));

    JavaPairDStream<String, Integer> reduceByKeyAndWindowUser = recordsMapUser
            .reduceByKeyAndWindow(new Function2<Integer, Integer, Integer>() {
                private static final long serialVersionUID = 197675516004789269L;

                public Integer call(Integer val1, Integer val2) throws Exception {
                    return val1 + val2;

                }
            }, new Duration(86400000), new Duration(60000));

    // reduce.count();
    reduceByKeyAndWindowState.print();

    reduceByKeyAndWindowState.foreachRDD(new Function<JavaPairRDD<String, Integer>, Void>() {
        private static final long serialVersionUID = 8534726505385048702L;

        public Void call(JavaPairRDD<String, Integer> rdd) throws Exception {
            saveToHBase(rdd, newAPIJobConfigurationState.getConfiguration());
            return null;
        }
    });

    reduceByKeyAndWindowUser.foreachRDD(new Function<JavaPairRDD<String, Integer>, Void>() {
        private static final long serialVersionUID = 8534726505385048702L;

        public Void call(JavaPairRDD<String, Integer> rdd) throws Exception {
            saveToHBase(rdd, newAPIJobConfigurationUser.getConfiguration());
            return null;
        }
    });

    javaStreamingContext.start();
    javaStreamingContext.awaitTermination();
}