List of usage examples for org.apache.hadoop.conf Configuration set
public void set(String name, String value)
value
of the name
property. From source file:co.cask.cdap.test.ConfigurableTestBase.java
License:Apache License
private static void initialize(@Nullable Map<String, String> additionalConfiguration) throws Exception { if (startCount++ > 0) { return;/* w w w . j a va 2 s . co m*/ } File localDataDir = tmpFolder.newFolder(); cConf = createCConf(localDataDir, additionalConfiguration); org.apache.hadoop.conf.Configuration hConf = new org.apache.hadoop.conf.Configuration(); hConf.addResource("mapred-site-local.xml"); hConf.reloadConfiguration(); hConf.set(Constants.CFG_LOCAL_DATA_DIR, localDataDir.getAbsolutePath()); hConf.set(Constants.AppFabric.OUTPUT_DIR, cConf.get(Constants.AppFabric.OUTPUT_DIR)); hConf.set("hadoop.tmp.dir", new File(localDataDir, cConf.get(Constants.AppFabric.TEMP_DIR)).getAbsolutePath()); // Windows specific requirements if (OSDetector.isWindows()) { File tmpDir = tmpFolder.newFolder(); File binDir = new File(tmpDir, "bin"); Assert.assertTrue(binDir.mkdirs()); copyTempFile("hadoop.dll", tmpDir); copyTempFile("winutils.exe", binDir); System.setProperty("hadoop.home.dir", tmpDir.getAbsolutePath()); System.load(new File(tmpDir, "hadoop.dll").getAbsolutePath()); } Injector injector = Guice.createInjector(createDataFabricModule(), new DataSetsModules().getStandaloneModules(), new DataSetServiceModules().getInMemoryModules(), new ConfigModule(cConf, hConf), new IOModule(), new LocationRuntimeModule().getInMemoryModules(), new DiscoveryRuntimeModule().getInMemoryModules(), new AppFabricServiceRuntimeModule().getInMemoryModules(), new ServiceStoreModules().getInMemoryModules(), new InMemoryProgramRunnerModule(LocalStreamWriter.class), new AbstractModule() { @Override protected void configure() { bind(StreamHandler.class).in(Scopes.SINGLETON); bind(StreamFetchHandler.class).in(Scopes.SINGLETON); bind(AbstractNamespaceClient.class).to(LocalNamespaceClient.class).in(Scopes.SINGLETON); bind(StreamFileJanitorService.class).to(LocalStreamFileJanitorService.class) .in(Scopes.SINGLETON); bind(StreamWriterSizeCollector.class).to(BasicStreamWriterSizeCollector.class) .in(Scopes.SINGLETON); bind(StreamCoordinatorClient.class).to(InMemoryStreamCoordinatorClient.class) .in(Scopes.SINGLETON); } }, // todo: do we need handler? new MetricsHandlerModule(), new MetricsClientRuntimeModule().getInMemoryModules(), new LoggingModules().getInMemoryModules(), new ExploreRuntimeModule().getInMemoryModules(), new ExploreClientModule(), new NotificationFeedServiceRuntimeModule().getInMemoryModules(), new NotificationServiceRuntimeModule().getInMemoryModules(), new AbstractModule() { @Override @SuppressWarnings("deprecation") protected void configure() { install(new FactoryModuleBuilder() .implement(ApplicationManager.class, DefaultApplicationManager.class) .build(ApplicationManagerFactory.class)); install(new FactoryModuleBuilder().implement(StreamWriter.class, DefaultStreamWriter.class) .build(StreamWriterFactory.class)); install(new FactoryModuleBuilder() .implement(StreamManager.class, DefaultStreamManager.class) .build(StreamManagerFactory.class)); bind(TemporaryFolder.class).toInstance(tmpFolder); } }); txService = injector.getInstance(TransactionManager.class); txService.startAndWait(); dsOpService = injector.getInstance(DatasetOpExecutor.class); dsOpService.startAndWait(); datasetService = injector.getInstance(DatasetService.class); datasetService.startAndWait(); metricsQueryService = injector.getInstance(MetricsQueryService.class); metricsQueryService.startAndWait(); metricsCollectionService = injector.getInstance(MetricsCollectionService.class); metricsCollectionService.startAndWait(); schedulerService = injector.getInstance(SchedulerService.class); schedulerService.startAndWait(); if (cConf.getBoolean(Constants.Explore.EXPLORE_ENABLED)) { exploreExecutorService = injector.getInstance(ExploreExecutorService.class); exploreExecutorService.startAndWait(); exploreClient = injector.getInstance(ExploreClient.class); } streamCoordinatorClient = injector.getInstance(StreamCoordinatorClient.class); streamCoordinatorClient.startAndWait(); testManager = injector.getInstance(UnitTestManager.class); namespaceAdmin = injector.getInstance(NamespaceAdmin.class); // we use MetricStore directly, until RuntimeStats API changes RuntimeStats.metricStore = injector.getInstance(MetricStore.class); namespaceAdmin = injector.getInstance(NamespaceAdmin.class); namespaceAdmin.createNamespace(Constants.DEFAULT_NAMESPACE_META); }
From source file:co.cask.cdap.test.TestBase.java
License:Apache License
@BeforeClass public static void initialize() throws Exception { if (startCount++ > 0) { return;//from w w w . j ava 2 s .co m } File localDataDir = TMP_FOLDER.newFolder(); cConf = createCConf(localDataDir); org.apache.hadoop.conf.Configuration hConf = new org.apache.hadoop.conf.Configuration(); hConf.addResource("mapred-site-local.xml"); hConf.reloadConfiguration(); hConf.set(Constants.CFG_LOCAL_DATA_DIR, localDataDir.getAbsolutePath()); hConf.set(Constants.AppFabric.OUTPUT_DIR, cConf.get(Constants.AppFabric.OUTPUT_DIR)); hConf.set("hadoop.tmp.dir", new File(localDataDir, cConf.get(Constants.AppFabric.TEMP_DIR)).getAbsolutePath()); // Windows specific requirements if (OSDetector.isWindows()) { File tmpDir = TMP_FOLDER.newFolder(); File binDir = new File(tmpDir, "bin"); Assert.assertTrue(binDir.mkdirs()); copyTempFile("hadoop.dll", tmpDir); copyTempFile("winutils.exe", binDir); System.setProperty("hadoop.home.dir", tmpDir.getAbsolutePath()); System.load(new File(tmpDir, "hadoop.dll").getAbsolutePath()); } Injector injector = Guice.createInjector(createDataFabricModule(), new TransactionExecutorModule(), new DataSetsModules().getStandaloneModules(), new DataSetServiceModules().getInMemoryModules(), new ConfigModule(cConf, hConf), new IOModule(), new LocationRuntimeModule().getInMemoryModules(), new DiscoveryRuntimeModule().getInMemoryModules(), new AppFabricServiceRuntimeModule().getInMemoryModules(), new ServiceStoreModules().getInMemoryModules(), new InMemoryProgramRunnerModule(LocalStreamWriter.class), new AbstractModule() { @Override protected void configure() { bind(StreamHandler.class).in(Scopes.SINGLETON); bind(StreamFetchHandler.class).in(Scopes.SINGLETON); bind(StreamViewHttpHandler.class).in(Scopes.SINGLETON); bind(StreamFileJanitorService.class).to(LocalStreamFileJanitorService.class) .in(Scopes.SINGLETON); bind(StreamWriterSizeCollector.class).to(BasicStreamWriterSizeCollector.class) .in(Scopes.SINGLETON); bind(StreamCoordinatorClient.class).to(InMemoryStreamCoordinatorClient.class) .in(Scopes.SINGLETON); bind(MetricsManager.class).toProvider(MetricsManagerProvider.class); } }, // todo: do we need handler? new MetricsHandlerModule(), new MetricsClientRuntimeModule().getInMemoryModules(), new LoggingModules().getInMemoryModules(), new ExploreRuntimeModule().getInMemoryModules(), new ExploreClientModule(), new NotificationFeedServiceRuntimeModule().getInMemoryModules(), new NotificationServiceRuntimeModule().getInMemoryModules(), new NamespaceClientRuntimeModule().getStandaloneModules(), new NamespaceStoreModule().getStandaloneModules(), new AuthorizationModule(), new AbstractModule() { @Override @SuppressWarnings("deprecation") protected void configure() { install(new FactoryModuleBuilder() .implement(ApplicationManager.class, DefaultApplicationManager.class) .build(ApplicationManagerFactory.class)); install(new FactoryModuleBuilder() .implement(ArtifactManager.class, DefaultArtifactManager.class) .build(ArtifactManagerFactory.class)); install(new FactoryModuleBuilder() .implement(StreamManager.class, DefaultStreamManager.class) .build(StreamManagerFactory.class)); bind(TemporaryFolder.class).toInstance(TMP_FOLDER); bind(AuthorizationHandler.class).in(Scopes.SINGLETON); } }); txService = injector.getInstance(TransactionManager.class); txService.startAndWait(); dsOpService = injector.getInstance(DatasetOpExecutor.class); dsOpService.startAndWait(); datasetService = injector.getInstance(DatasetService.class); datasetService.startAndWait(); metricsQueryService = injector.getInstance(MetricsQueryService.class); metricsQueryService.startAndWait(); metricsCollectionService = injector.getInstance(MetricsCollectionService.class); metricsCollectionService.startAndWait(); schedulerService = injector.getInstance(SchedulerService.class); schedulerService.startAndWait(); if (cConf.getBoolean(Constants.Explore.EXPLORE_ENABLED)) { exploreExecutorService = injector.getInstance(ExploreExecutorService.class); exploreExecutorService.startAndWait(); exploreClient = injector.getInstance(ExploreClient.class); } streamCoordinatorClient = injector.getInstance(StreamCoordinatorClient.class); streamCoordinatorClient.startAndWait(); testManager = injector.getInstance(UnitTestManager.class); metricsManager = injector.getInstance(MetricsManager.class); authorizerInstantiatorService = injector.getInstance(AuthorizerInstantiatorService.class); authorizerInstantiatorService.startAndWait(); // This is needed so the logged-in user can successfully create the default namespace if (cConf.getBoolean(Constants.Security.Authorization.ENABLED)) { InstanceId instance = new InstanceId(cConf.get(Constants.INSTANCE_NAME)); Principal principal = new Principal(SecurityRequestContext.getUserId(), Principal.PrincipalType.USER); authorizerInstantiatorService.get().grant(instance, principal, ImmutableSet.of(Action.ADMIN)); } namespaceAdmin = injector.getInstance(NamespaceAdmin.class); namespaceAdmin.create(NamespaceMeta.DEFAULT); }
From source file:co.cask.hydrator.plugin.batch.ETLMapReduceTestRun.java
License:Apache License
@Test public void testS3toTPFS() throws Exception { String testPath = "s3n://test/"; String testFile1 = "2015-06-17-00-00-00.txt"; String testData1 = "Sample data for testing."; String testFile2 = "abc.txt"; String testData2 = "Sample data for testing."; S3NInMemoryFileSystem fs = new S3NInMemoryFileSystem(); Configuration conf = new Configuration(); conf.set("fs.s3n.impl", S3NInMemoryFileSystem.class.getName()); fs.initialize(URI.create("s3n://test/"), conf); fs.createNewFile(new Path(testPath)); try (FSDataOutputStream fos1 = fs.create(new Path(testPath + testFile1))) { fos1.write(testData1.getBytes()); fos1.flush();/*from ww w . j ava2s . c om*/ } try (FSDataOutputStream fos2 = fs.create(new Path(testPath + testFile2))) { fos2.write(testData2.getBytes()); fos2.flush(); } Method method = FileSystem.class.getDeclaredMethod("addFileSystemForTesting", URI.class, Configuration.class, FileSystem.class); method.setAccessible(true); method.invoke(FileSystem.class, URI.create("s3n://test/"), conf, fs); ETLStage source = new ETLStage("source", new ETLPlugin("S3", BatchSource.PLUGIN_TYPE, ImmutableMap.<String, String>builder().put(Constants.Reference.REFERENCE_NAME, "S3TestSource") .put(Properties.S3.ACCESS_KEY, "key").put(Properties.S3.ACCESS_ID, "ID") .put(Properties.S3.PATH, testPath).put(Properties.S3.FILE_REGEX, "abc.*").build(), null)); ETLStage sink = new ETLStage("sink", new ETLPlugin("TPFSAvro", BatchSink.PLUGIN_TYPE, ImmutableMap.of(Properties.TimePartitionedFileSetDataset.SCHEMA, FileBatchSource.DEFAULT_SCHEMA.toString(), Properties.TimePartitionedFileSetDataset.TPFS_NAME, "TPFSsink"), null)); ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(sink) .addConnection(source.getName(), sink.getName()).build(); AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(ETLBATCH_ARTIFACT, etlConfig); Id.Application appId = Id.Application.from(Id.Namespace.DEFAULT, "S3ToTPFS"); ApplicationManager appManager = deployApplication(appId, appRequest); MapReduceManager mrManager = appManager.getMapReduceManager(ETLMapReduce.NAME); mrManager.start(); mrManager.waitForFinish(2, TimeUnit.MINUTES); DataSetManager<TimePartitionedFileSet> fileSetManager = getDataset("TPFSsink"); try (TimePartitionedFileSet fileSet = fileSetManager.get()) { List<GenericRecord> records = readOutput(fileSet, FileBatchSource.DEFAULT_SCHEMA); // Two input files, each with one input record were specified. However, only one file matches the regex, // so only one record should be found in the output. Assert.assertEquals(1, records.size()); Assert.assertEquals(testData1, records.get(0).get("body").toString()); } }
From source file:co.cask.hydrator.plugin.batch.source.BatchCassandraSource.java
License:Apache License
@Override public void prepareRun(BatchSourceContext context) throws Exception { Configuration conf = new Configuration(); conf.clear();/*from ww w. j av a 2 s . co m*/ ConfigHelper.setInputColumnFamily(conf, config.keyspace, config.columnFamily); ConfigHelper.setInputInitialAddress(conf, config.initialAddress); ConfigHelper.setInputPartitioner(conf, config.partitioner); ConfigHelper.setInputRpcPort(conf, (config.port == null) ? "9160" : Integer.toString(config.port)); Preconditions .checkArgument(!(Strings.isNullOrEmpty(config.username) ^ Strings.isNullOrEmpty(config.password)), "You must either set both username and password or neither username nor password. " + "Currently, they are username: " + config.username + " and password: " + config.password); if (!Strings.isNullOrEmpty(config.username)) { ConfigHelper.setInputKeyspaceUserNameAndPassword(conf, config.username, config.password); } if (!Strings.isNullOrEmpty(config.properties)) { for (String pair : config.properties.split(",")) { // the key and value of properties might have spaces so remove only leading and trailing ones conf.set(CharMatcher.WHITESPACE.trimFrom(pair.split(":")[0]), CharMatcher.WHITESPACE.trimFrom(pair.split(":")[1])); } } CqlConfigHelper.setInputCql(conf, config.query); context.setInput(Input.of(config.referenceName, new SourceInputFormatProvider(CqlInputFormat.class, conf))); }
From source file:co.cask.hydrator.plugin.batch.source.ExcelInputFormat.java
License:Apache License
public static void setConfigurations(Job job, String filePattern, String sheetName, boolean reprocess, int sheetNo, String columnList, boolean skipFirstRow, String terminateIfEmptyRow, String rowLimit, String ifErrorRecord, String processedFiles) { Configuration configuration = job.getConfiguration(); configuration.set(FILE_PATTERN, filePattern); configuration.set(SHEET_NAME, sheetName); configuration.setBoolean(RE_PROCESS, reprocess); configuration.setInt(SHEET_NO, sheetNo); configuration.set(COLUMN_LIST, columnList); configuration.setBoolean(SKIP_FIRST_ROW, skipFirstRow); configuration.set(TERMINATE_IF_EMPTY_ROW, terminateIfEmptyRow); if (!Strings.isNullOrEmpty(rowLimit)) { configuration.set(ROWS_LIMIT, rowLimit); }//from w w w . jav a2 s . c om configuration.set(IF_ERROR_RECORD, ifErrorRecord); configuration.set(PROCESSED_FILES, processedFiles); }
From source file:co.cask.hydrator.plugin.batch.source.FileBatchSource.java
License:Apache License
@Override public void prepareRun(BatchSourceContext context) throws Exception { //SimpleDateFormat needs to be local because it is not threadsafe SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd-HH"); //calculate date one hour ago, rounded down to the nearest hour prevHour = new Date(context.getLogicalStartTime() - TimeUnit.HOURS.toMillis(1)); Calendar cal = Calendar.getInstance(); cal.setTime(prevHour);/* w w w . j a v a 2 s. co m*/ cal.set(Calendar.MINUTE, 0); cal.set(Calendar.SECOND, 0); cal.set(Calendar.MILLISECOND, 0); prevHour = cal.getTime(); Job job = JobUtils.createInstance(); Configuration conf = job.getConfiguration(); Map<String, String> properties = GSON.fromJson(config.fileSystemProperties, MAP_STRING_STRING_TYPE); //noinspection ConstantConditions for (Map.Entry<String, String> entry : properties.entrySet()) { conf.set(entry.getKey(), entry.getValue()); } conf.set(INPUT_REGEX_CONFIG, config.fileRegex); conf.set(INPUT_NAME_CONFIG, config.path); if (config.timeTable != null) { table = context.getDataset(config.timeTable); datesToRead = Bytes.toString(table.read(LAST_TIME_READ)); if (datesToRead == null) { List<Date> firstRun = Lists.newArrayList(new Date(0)); datesToRead = GSON.toJson(firstRun, ARRAYLIST_DATE_TYPE); } List<Date> attempted = Lists.newArrayList(prevHour); String updatedDatesToRead = GSON.toJson(attempted, ARRAYLIST_DATE_TYPE); if (!updatedDatesToRead.equals(datesToRead)) { table.write(LAST_TIME_READ, updatedDatesToRead); } conf.set(LAST_TIME_READ, datesToRead); } conf.set(CUTOFF_READ_TIME, dateFormat.format(prevHour)); FileInputFormat.setInputPathFilter(job, BatchFileFilter.class); FileInputFormat.addInputPath(job, new Path(config.path)); if (config.maxSplitSize != null) { FileInputFormat.setMaxInputSplitSize(job, config.maxSplitSize); } context.setInput( Input.of(config.referenceName, new SourceInputFormatProvider(config.inputFormatClass, conf))); }
From source file:co.cask.hydrator.plugin.batch.source.HiveBatchSource.java
License:Apache License
@Override public void prepareRun(BatchSourceContext context) throws Exception { // This line is to load VersionInfo class here to make it available in the HCatInputFormat.setInput call. This is // needed to support CDAP 3.2 where we were just exposing the classes of the plugin jar and not the resources. LOG.trace("Hadoop version: {}", VersionInfo.getVersion()); Job job = JobUtils.createInstance(); Configuration conf = job.getConfiguration(); conf.set(HiveConf.ConfVars.METASTOREURIS.varname, config.metaStoreURI); if (UserGroupInformation.isSecurityEnabled()) { conf.set(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL.varname, "true"); conf.set("hive.metastore.token.signature", HiveAuthFactory.HS2_CLIENT_TOKEN); }//from w w w .j av a 2s .com // Use the current thread's classloader to ensure that when setInput is called it can access VersionInfo class // loaded above. This is needed to support CDAP 3.2 where we were just exposing classes to plugin jars and not // resources. ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); try { Thread.currentThread().setContextClassLoader(getClass().getClassLoader()); HCatInputFormat.setInput(conf, config.dbName, config.tableName, config.partitions); } finally { Thread.currentThread().setContextClassLoader(classLoader); } HCatSchema hCatSchema = HCatInputFormat.getTableSchema(conf); if (config.schema != null) { // if the user provided a schema then we should use that schema to read the table. This will allow user to // drop non-primitive types and read the table. hCatSchema = HiveSchemaConverter.toHiveSchema(Schema.parseJson(config.schema), hCatSchema); HCatInputFormat.setOutputSchema(job, hCatSchema); } HiveSchemaStore.storeHiveSchema(context, config.dbName, config.tableName, hCatSchema); context.setInput( Input.of(config.referenceName, new SourceInputFormatProvider(HCatInputFormat.class, conf))); }
From source file:co.cask.hydrator.plugin.batch.source.XMLReaderBatchSource.java
License:Apache License
@Override public void prepareRun(BatchSourceContext context) throws Exception { Job job = JobUtils.createInstance(); Configuration conf = job.getConfiguration(); conf.set(XMLInputFormat.XML_INPUTFORMAT_PATH_NAME, config.path); conf.set(XMLInputFormat.XML_INPUTFORMAT_NODE_PATH, config.nodePath); if (StringUtils.isNotEmpty(config.pattern)) { conf.set(XMLInputFormat.XML_INPUTFORMAT_PATTERN, config.pattern); }//from w w w . j a v a 2 s . c o m conf.set(XMLInputFormat.XML_INPUTFORMAT_FILE_ACTION, config.actionAfterProcess); if (StringUtils.isNotEmpty(config.targetFolder)) { conf.set(XMLInputFormat.XML_INPUTFORMAT_TARGET_FOLDER, config.targetFolder); } setFileTrackingInfo(context, conf); //Create a temporary directory, in which XMLRecordReader will add file tracking information. fileSystem = FileSystem.get(conf); long startTime = context.getLogicalStartTime(); //Create temp file name using start time to make it unique. String tempDirectory = config.tableName + startTime; tempDirectoryPath = new Path(config.temporaryFolder, tempDirectory); fileSystem.mkdirs(tempDirectoryPath); fileSystem.deleteOnExit(tempDirectoryPath); conf.set(XMLInputFormat.XML_INPUTFORMAT_PROCESSED_DATA_TEMP_FOLDER, tempDirectoryPath.toUri().toString()); XMLInputFormat.setInputPathFilter(job, BatchXMLFileFilter.class); XMLInputFormat.addInputPath(job, new Path(config.path)); context.setInput(Input.of(config.referenceName, new SourceInputFormatProvider(XMLInputFormat.class, conf))); }
From source file:co.cask.hydrator.plugin.batch.source.XMLReaderBatchSource.java
License:Apache License
/** * Method to set file tracking information in to configuration. */// w ww . ja v a 2 s.c om private void setFileTrackingInfo(BatchSourceContext context, Configuration conf) { //For reprocessing not required, set processed file name to configuration. processedFileTrackingTable = context.getDataset(config.tableName); if (processedFileTrackingTable != null && !config.isReprocessingRequired()) { List<String> processedFiles = new ArrayList<String>(); Calendar cal = Calendar.getInstance(); cal.add(Calendar.DATE, -Integer.valueOf(config.tableExpiryPeriod)); Date expiryDate = cal.getTime(); try (CloseableIterator<KeyValue<byte[], byte[]>> iterator = processedFileTrackingTable.scan(null, null)) { while (iterator.hasNext()) { KeyValue<byte[], byte[]> keyValue = iterator.next(); //Delete record before expiry time period Long time = Bytes.toLong(keyValue.getValue()); Date processedDate = new Date(time); if (processedDate.before(expiryDate)) { processedFileTrackingTable.delete(keyValue.getKey()); } else { processedFiles.add(Bytes.toString(keyValue.getKey())); } } } //File name use by BatchXMLFileFilter to filter already processed files. conf.set(XMLInputFormat.XML_INPUTFORMAT_PROCESSED_FILES, GSON.toJson(processedFiles, ARRAYLIST_PREPROCESSED_FILES)); } }
From source file:co.cask.hydrator.plugin.batchSource.KafkaInputFormat.java
License:Apache License
public static List<KafkaRequest> saveKafkaRequests(Configuration conf, String topic, Map<String, Integer> brokers, Set<Integer> partitions, Map<TopicAndPartition, Long> initOffsets, KeyValueTable table) throws Exception { ArrayList<KafkaRequest> finalRequests; HashMap<LeaderInfo, ArrayList<TopicAndPartition>> offsetRequestInfo = new HashMap<>(); // Get Metadata for all topics List<TopicMetadata> topicMetadataList = getKafkaMetadata(brokers, topic); for (TopicMetadata topicMetadata : topicMetadataList) { for (PartitionMetadata partitionMetadata : topicMetadata.partitionsMetadata()) { LeaderInfo leader = new LeaderInfo( new URI("tcp://" + partitionMetadata.leader().connectionString()), partitionMetadata.leader().id()); if (partitions.isEmpty() || partitions.contains(partitionMetadata.partitionId())) { if (offsetRequestInfo.containsKey(leader)) { ArrayList<TopicAndPartition> topicAndPartitions = offsetRequestInfo.get(leader); topicAndPartitions/*from w w w. j ava2 s. c o m*/ .add(new TopicAndPartition(topicMetadata.topic(), partitionMetadata.partitionId())); offsetRequestInfo.put(leader, topicAndPartitions); } else { ArrayList<TopicAndPartition> topicAndPartitions = new ArrayList<>(); topicAndPartitions .add(new TopicAndPartition(topicMetadata.topic(), partitionMetadata.partitionId())); offsetRequestInfo.put(leader, topicAndPartitions); } } } } // Get the latest offsets and generate the KafkaRequests finalRequests = fetchLatestOffsetAndCreateKafkaRequests(offsetRequestInfo, initOffsets, table); Collections.sort(finalRequests, new Comparator<KafkaRequest>() { @Override public int compare(KafkaRequest r1, KafkaRequest r2) { return r1.getTopic().compareTo(r2.getTopic()); } }); Map<KafkaRequest, KafkaKey> offsetKeys = new HashMap<>(); for (KafkaRequest request : finalRequests) { KafkaKey key = offsetKeys.get(request); if (key != null) { request.setOffset(key.getOffset()); request.setAvgMsgSize(key.getMessageSize()); } if (request.getEarliestOffset() > request.getOffset() || request.getOffset() > request.getLastOffset()) { boolean offsetUnset = request.getOffset() == KafkaRequest.DEFAULT_OFFSET; // When the offset is unset, it means it's a new topic/partition, we also need to consume the earliest offset if (offsetUnset) { request.setOffset(request.getEarliestOffset()); offsetKeys.put(request, new KafkaKey(request.getTopic(), request.getLeaderId(), request.getPartition(), 0, request.getOffset())); } } } conf.set(KAFKA_REQUEST, new Gson().toJson(finalRequests)); return finalRequests; }