List of usage examples for java.util TimerTask TimerTask
protected TimerTask()
From source file:com.linkedin.pinot.core.data.manager.realtime.RealtimeSegmentDataManager.java
public RealtimeSegmentDataManager(final RealtimeSegmentZKMetadata segmentMetadata, final AbstractTableConfig tableConfig, InstanceZKMetadata instanceMetadata, RealtimeTableDataManager realtimeResourceManager, final String resourceDataDir, final ReadMode mode, final Schema schema, final ServerMetrics serverMetrics) throws Exception { super();//w ww. j a v a 2s .c om this.schema = schema; this.extractor = (PlainFieldExtractor) FieldExtractorFactory.getPlainFieldExtractor(schema); this.serverMetrics = serverMetrics; this.segmentName = segmentMetadata.getSegmentName(); this.tableName = tableConfig.getTableName(); IndexingConfig indexingConfig = tableConfig.getIndexingConfig(); if (indexingConfig.getSortedColumn().isEmpty()) { LOGGER.info("RealtimeDataResourceZKMetadata contains no information about sorted column for segment {}", segmentName); this.sortedColumn = null; } else { String firstSortedColumn = indexingConfig.getSortedColumn().get(0); if (this.schema.hasColumn(firstSortedColumn)) { LOGGER.info("Setting sorted column name: {} from RealtimeDataResourceZKMetadata for segment {}", firstSortedColumn, segmentName); this.sortedColumn = firstSortedColumn; } else { LOGGER.warn( "Sorted column name: {} from RealtimeDataResourceZKMetadata is not existed in schema for segment {}.", firstSortedColumn, segmentName); this.sortedColumn = null; } } //inverted index columns invertedIndexColumns = indexingConfig.getInvertedIndexColumns(); this.segmentMetatdaZk = segmentMetadata; // create and init stream provider config // TODO : ideally resourceMetatda should create and give back a streamProviderConfig this.kafkaStreamProviderConfig = new KafkaHighLevelStreamProviderConfig(); this.kafkaStreamProviderConfig.init(tableConfig, instanceMetadata, schema); segmentLogger = LoggerFactory.getLogger(RealtimeSegmentDataManager.class.getName() + "_" + segmentName + "_" + kafkaStreamProviderConfig.getStreamName()); segmentLogger.info("Created segment data manager with Sorted column:{}, invertedIndexColumns:{}", sortedColumn, invertedIndexColumns); segmentEndTimeThreshold = start + kafkaStreamProviderConfig.getTimeThresholdToFlushSegment(); this.resourceDir = new File(resourceDataDir); this.resourceTmpDir = new File(resourceDataDir, "_tmp"); if (!resourceTmpDir.exists()) { resourceTmpDir.mkdirs(); } // create and init stream provider final String tableName = tableConfig.getTableName(); this.kafkaStreamProvider = StreamProviderFactory.buildStreamProvider(); this.kafkaStreamProvider.init(kafkaStreamProviderConfig, tableName, serverMetrics); this.kafkaStreamProvider.start(); this.tableStreamName = tableName + "_" + kafkaStreamProviderConfig.getStreamName(); // lets create a new realtime segment segmentLogger.info("Started kafka stream provider"); realtimeSegment = new RealtimeSegmentImpl(schema, kafkaStreamProviderConfig.getSizeThresholdToFlushSegment(), tableName, segmentMetadata.getSegmentName(), kafkaStreamProviderConfig.getStreamName(), serverMetrics); realtimeSegment.setSegmentMetadata(segmentMetadata, this.schema); notifier = realtimeResourceManager; segmentStatusTask = new TimerTask() { @Override public void run() { computeKeepIndexing(); } }; // start the indexing thread indexingThread = new Thread(new Runnable() { @Override public void run() { // continue indexing until criteria is met boolean notFull = true; long exceptionSleepMillis = 50L; segmentLogger.info("Starting to collect rows"); do { GenericRow row = null; try { row = kafkaStreamProvider.next(); row = extractor.transform(row); if (row != null) { notFull = realtimeSegment.index(row); exceptionSleepMillis = 50L; } } catch (Exception e) { segmentLogger.warn( "Caught exception while indexing row, sleeping for {} ms, row contents {}", exceptionSleepMillis, row, e); // Sleep for a short time as to avoid filling the logs with exceptions too quickly Uninterruptibles.sleepUninterruptibly(exceptionSleepMillis, TimeUnit.MILLISECONDS); exceptionSleepMillis = Math.min(60000L, exceptionSleepMillis * 2); } catch (Error e) { segmentLogger.error("Caught error in indexing thread", e); throw e; } } while (notFull && keepIndexing && (!isShuttingDown)); if (isShuttingDown) { segmentLogger.info("Shutting down indexing thread!"); return; } try { int numErrors, numConversions, numNulls, numNullCols; if ((numErrors = extractor.getTotalErrors()) > 0) { serverMetrics.addMeteredTableValue(tableStreamName, ServerMeter.ROWS_WITH_ERRORS, (long) numErrors); } Map<String, Integer> errorCount = extractor.getError_count(); for (String column : errorCount.keySet()) { if ((numErrors = errorCount.get(column)) > 0) { segmentLogger.warn("Column {} had {} rows with errors", column, numErrors); } } if ((numConversions = extractor.getTotalConversions()) > 0) { serverMetrics.addMeteredTableValue(tableStreamName, ServerMeter.ROWS_NEEDING_CONVERSIONS, (long) numConversions); segmentLogger.info("{} rows needed conversions ", numConversions); } if ((numNulls = extractor.getTotalNulls()) > 0) { serverMetrics.addMeteredTableValue(tableStreamName, ServerMeter.ROWS_WITH_NULL_VALUES, (long) numNulls); segmentLogger.info("{} rows had null columns", numNulls); } if ((numNullCols = extractor.getTotalNullCols()) > 0) { serverMetrics.addMeteredTableValue(tableStreamName, ServerMeter.COLUMNS_WITH_NULL_VALUES, (long) numNullCols); segmentLogger.info("{} columns had null values", numNullCols); } segmentLogger.info("Indexing threshold reached, proceeding with index conversion"); // kill the timer first segmentStatusTask.cancel(); updateCurrentDocumentCountMetrics(); segmentLogger.info("Indexed {} raw events, current number of docs = {}", realtimeSegment.getRawDocumentCount(), realtimeSegment.getSegmentMetadata().getTotalDocs()); File tempSegmentFolder = new File(resourceTmpDir, "tmp-" + String.valueOf(System.currentTimeMillis())); // lets convert the segment now RealtimeSegmentConverter converter = new RealtimeSegmentConverter(realtimeSegment, tempSegmentFolder.getAbsolutePath(), schema, segmentMetadata.getTableName(), segmentMetadata.getSegmentName(), sortedColumn, invertedIndexColumns); segmentLogger.info("Trying to build segment"); final long buildStartTime = System.nanoTime(); converter.build(); final long buildEndTime = System.nanoTime(); segmentLogger.info("Built segment in {} ms", TimeUnit.MILLISECONDS.convert((buildEndTime - buildStartTime), TimeUnit.NANOSECONDS)); File destDir = new File(resourceDataDir, segmentMetadata.getSegmentName()); FileUtils.deleteQuietly(destDir); FileUtils.moveDirectory(tempSegmentFolder.listFiles()[0], destDir); FileUtils.deleteQuietly(tempSegmentFolder); long segStartTime = realtimeSegment.getMinTime(); long segEndTime = realtimeSegment.getMaxTime(); TimeUnit timeUnit = schema.getTimeFieldSpec().getOutgoingGranularitySpec().getTimeType(); Configuration configuration = new PropertyListConfiguration(); configuration.setProperty(IndexLoadingConfigMetadata.KEY_OF_LOADING_INVERTED_INDEX, invertedIndexColumns); IndexLoadingConfigMetadata configMetadata = new IndexLoadingConfigMetadata(configuration); IndexSegment segment = Loaders.IndexSegment .load(new File(resourceDir, segmentMetatdaZk.getSegmentName()), mode, configMetadata); segmentLogger.info("Committing Kafka offsets"); boolean commitSuccessful = false; try { kafkaStreamProvider.commit(); commitSuccessful = true; kafkaStreamProvider.shutdown(); segmentLogger.info("Successfully committed Kafka offsets, consumer release requested."); } catch (Throwable e) { // If we got here, it means that either the commit or the shutdown failed. Considering that the // KafkaConsumerManager delays shutdown and only adds the consumer to be released in a deferred way, this // likely means that writing the Kafka offsets failed. // // The old logic (mark segment as done, then commit offsets and shutdown the consumer immediately) would die // in a terrible way, leaving the consumer open and causing us to only get half the records from that point // on. In this case, because we keep the consumer open for a little while, we should be okay if the // controller reassigns us a new segment before the consumer gets released. Hopefully by the next time that // we get to committing the offsets, the transient ZK failure that caused the write to fail will not // happen again and everything will be good. // // Several things can happen: // - The controller reassigns us a new segment before we release the consumer (KafkaConsumerManager will // keep the consumer open for about a minute, which should be enough time for the controller to reassign // us a new segment) and the next time we close the segment the offsets commit successfully; we're good. // - The controller reassigns us a new segment, but after we released the consumer (if the controller was // down or there was a ZK failure on writing the Kafka offsets but not the Helix state). We lose whatever // data was in this segment. Not good. // - The server crashes after this comment and before we mark the current segment as done; if the Kafka // offsets didn't get written, then when the server restarts it'll start consuming the current segment // from the previously committed offsets; we're good. // - The server crashes after this comment, the Kafka offsets were written but the segment wasn't marked as // done in Helix, but we got a failure (or not) on the commit; we lose whatever data was in this segment // if we restart the server (not good). If we manually mark the segment as done in Helix by editing the // state in ZK, everything is good, we'll consume a new segment that starts from the correct offsets. // // This is still better than the previous logic, which would have these failure modes: // - Consumer was left open and the controller reassigned us a new segment; consume only half the events // (because there are two consumers and Kafka will try to rebalance partitions between those two) // - We got a segment assigned to us before we got around to committing the offsets, reconsume the data that // we got in this segment again, as we're starting consumption from the previously committed offset (eg. // duplicate data). // // This is still not very satisfactory, which is why this part is due for a redesign. // // Assuming you got here because the realtime offset commit metric has fired, check the logs to determine // which of the above scenarios happened. If you're in one of the good scenarios, then there's nothing to // do. If you're not, then based on how critical it is to get those rows back, then your options are: // - Wipe the realtime table and reconsume everything (mark the replica as disabled so that clients don't // see query results from partially consumed data, then re-enable it when this replica has caught up) // - Accept that those rows are gone in this replica and move on (they'll be replaced by good offline data // soon anyway) // - If there's a replica that has consumed properly, you could shut it down, copy its segments onto this // replica, assign a new consumer group id to this replica, rename the copied segments and edit their // metadata to reflect the new consumer group id, copy the Kafka offsets from the shutdown replica onto // the new consumer group id and then restart both replicas. This should get you the missing rows. segmentLogger.error( "FATAL: Exception committing or shutting down consumer commitSuccessful={}", commitSuccessful, e); serverMetrics.addMeteredTableValue(tableName, ServerMeter.REALTIME_OFFSET_COMMIT_EXCEPTIONS, 1L); if (!commitSuccessful) { kafkaStreamProvider.shutdown(); } } try { segmentLogger.info("Marking current segment as completed in Helix"); RealtimeSegmentZKMetadata metadataToOverwrite = new RealtimeSegmentZKMetadata(); metadataToOverwrite.setTableName(segmentMetadata.getTableName()); metadataToOverwrite.setSegmentName(segmentMetadata.getSegmentName()); metadataToOverwrite.setSegmentType(SegmentType.OFFLINE); metadataToOverwrite.setStatus(Status.DONE); metadataToOverwrite.setStartTime(segStartTime); metadataToOverwrite.setEndTime(segEndTime); metadataToOverwrite.setTotalRawDocs(realtimeSegment.getSegmentMetadata().getTotalDocs()); metadataToOverwrite.setTimeUnit(timeUnit); notifier.notifySegmentCommitted(metadataToOverwrite, segment); segmentLogger.info( "Completed write of segment completion to Helix, waiting for controller to assign a new segment"); } catch (Exception e) { if (commitSuccessful) { segmentLogger.error( "Offsets were committed to Kafka but we were unable to mark this segment as completed in Helix. Manually mark the segment as completed in Helix; restarting this instance will result in data loss.", e); } else { segmentLogger.warn( "Caught exception while marking segment as completed in Helix. Offsets were not written, restarting the instance should be safe.", e); } } } catch (Exception e) { segmentLogger.error("Caught exception in the realtime indexing thread", e); } } }); indexingThread.start(); serverMetrics.addValueToTableGauge(tableName, ServerGauge.SEGMENT_COUNT, 1L); segmentLogger.debug("scheduling keepIndexing timer check"); // start a schedule timer to keep track of the segment TimerService.timer.schedule(segmentStatusTask, ONE_MINUTE_IN_MILLSEC, ONE_MINUTE_IN_MILLSEC); segmentLogger.info("finished scheduling keepIndexing timer check"); }
From source file:io.hawkcd.agent.Agent.java
private void startCheckForWorkTimer() { TimerTask checkForWorkTask = new TimerTask() { @Override//from w w w .j ava2 s. co m public void run() { Agent.this.checkForWork(); } }; this.checkForWorkTimer = new Timer(); this.checkForWorkTimer.schedule(checkForWorkTask, 2000, 4000); }
From source file:com.symbian.driver.plugins.ftptelnet.TelnetProcess.java
private static Timer startTelnetTimer(final int aTimeout) { Timer lTimeoutTimer = new Timer("Telnet client Timer", false); iStopReading = false;//from w ww . j a v a 2s . com lTimeoutTimer.schedule(new TimerTask() { public void run() { iStopReading = true; } }, aTimeout); return lTimeoutTimer; }
From source file:com.nokia.example.capturetheflag.network.SocketIONetworkClient.java
@Override public void setConnectionIdle(boolean isIdle) { if (isIdle) { mIdleTask = new TimerTask() { @Override//from www . ja v a2s .com public void run() { if (mSocketClient != null) { Log.d(TAG, "Idle, disconnecting..."); mSocketClient.disconnect(); mState = State.IDLE; mReconnectAttempts = 0; } } }; mTimer.schedule(mIdleTask, DISCONNECT_TIMEOUT); } else { if (mIdleTask != null) { boolean isCanceled = mIdleTask.cancel(); if (!isCanceled) { if (mSocketClient != null) { Log.d(TAG, "Not idle anymore, waking up..."); connect(mUrl, mPort); } } } } }
From source file:org.ambientdynamix.core.HomeActivity.java
@Override public void onCreate(Bundle savedInstanceState) { Log.v(TAG, "Activity State: onCreate()"); super.onCreate(savedInstanceState); // Set our static reference activity = this; setContentView(R.layout.home_tab);//from w ww . ja v a2s. com updateCheck(); points = new HashSet<>(); mMixpanel = MixpanelAPI.getInstance(this, Constants.MIXPANEL_TOKEN); mMap = ((MapFragment) getFragmentManager().findFragmentById(R.id.map_main)); // Check if we were successful in obtaining the map. if (mMap == null) { // check if google play service in the device is not available or out-dated. GooglePlayServicesUtil.isGooglePlayServicesAvailable(this); // nothing anymore, cuz android will take care of the rest (to remind user to update google play service). } // Construct the data source sensorMeasurements = new ArrayList<>(); // Create the adapter to convert the array to views sensorMeasurementAdapter = new SensorMeasurementAdapter(this, sensorMeasurements); // Attach the adapter to a ListView final TwoWayView listView = (TwoWayView) findViewById(R.id.lvItems); listView.setOrientation(TwoWayView.Orientation.VERTICAL); listView.setPadding(0, 0, 0, 0); listView.setItemMargin(0); listView.setAdapter(sensorMeasurementAdapter); //Disable for now final Intent activityRecognitionIntent = new Intent(this, ActivityRecognitionService.class); activityRecognitionPendingIntent = PendingIntent.getService(getApplicationContext(), 0, activityRecognitionIntent, PendingIntent.FLAG_UPDATE_CURRENT); mGoogleApiClient = new GoogleApiClient.Builder(this).addApi(ActivityRecognition.API) .addConnectionCallbacks(this).addOnConnectionFailedListener(this).addApi(LocationServices.API) .build(); mGoogleApiClient.connect(); pendingSendButton = (Button) findViewById(R.id.send_pending_now); pendingSendButton.setOnClickListener(new View.OnClickListener() { @Override public void onClick(View v) { new AsyncReportOnServerTask().execute(); try { final JSONObject props = new JSONObject(); props.put("count", DynamixService.getDataStorageSize()); //mMixpanel.track("send-stored-readings", props); } catch (JSONException ignore) { } } }); // Setup an state refresh timer, which periodically updates application // state in the appList final Timer refresher = new Timer(true); final TimerTask t = new TimerTask() { @Override public void run() { refreshData(); } }; refresher.scheduleAtFixedRate(t, 0, 5000); phoneIdTv = (TextView) this.findViewById(R.id.deviceId_label); expDescriptionTv = (TextView) this.findViewById(R.id.experiment_description); if (mMap.getMap() != null) { mMap.getMap().setMyLocationEnabled(true); mMap.getMap().getUiSettings().setAllGesturesEnabled(false); mMap.getMap().getUiSettings().setMyLocationButtonEnabled(false); } }
From source file:com.jbrisbin.vcloud.cache.RabbitMQAsyncCacheProvider.java
@Override public void remove(final String id, long delay) { delayTimer.schedule(new TimerTask() { @Override//from ww w . j av a2s . c o m public void run() { remove(id); } }, delay); }
From source file:eu.project.ttc.engines.morpho.CompostAE.java
@Override public void collectionProcessComplete() throws AnalysisEngineProcessException { SubTaskObserver observer = observerResource.getTaskObserver(TASK_NAME); observer.setTotalTaskWork(termIndexResource.getTermIndex().getWords().size()); LOGGER.info("Starting morphologyical compound detection for TermIndex {}", this.termIndexResource.getTermIndex().getName()); LOGGER.debug(this.toString()); wrMeasure = termIndexResource.getTermIndex().getWRMeasure(); swtLemmaIndex = termIndexResource.getTermIndex().getCustomIndex(TermIndexes.SINGLE_WORD_LEMMA); buildCompostIndex();/*from w w w .j av a 2 s . co m*/ final MutableLong cnt = new MutableLong(0); Timer progressLoggerTimer = new Timer("Morphosyntactic splitter AE"); progressLoggerTimer.schedule(new TimerTask() { @Override public void run() { int total = termIndexResource.getTermIndex().getWords().size(); CompostAE.LOGGER.info("Progress: {}% ({} on {})", String.format("%.2f", ((float) cnt.longValue() * 100) / total), cnt.longValue(), total); } }, 5000l, 5000l); int observingStep = 100; for (Term swt : termIndexResource.getTermIndex().getTerms()) { if (!swt.isSingleWord()) continue; cnt.increment(); if (cnt.longValue() % observingStep == 0) { observer.work(observingStep); } /* * Do not do native morphology splitting * if a composition already exists. */ Word word = swt.getWords().get(0).getWord(); if (word.isCompound()) continue; Map<Segmentation, Double> scores = computeScores(word.getLemma()); if (scores.size() > 0) { List<Segmentation> segmentations = Lists.newArrayList(scores.keySet()); /* * compare segmentations in a deterministic way. */ segmentations.sort(new Comparator<Segmentation>() { @Override public int compare(Segmentation o1, Segmentation o2) { int comp = Double.compare(scores.get(o2), scores.get(o1)); if (comp != 0) return comp; comp = Integer.compare(o1.getSegments().size(), o2.getSegments().size()); if (comp != 0) return comp; for (int i = 0; i < o1.getSegments().size(); i++) { comp = Integer.compare(o2.getSegments().get(i).getEnd(), o1.getSegments().get(i).getEnd()); if (comp != 0) return comp; } return 0; } }); Segmentation bestSegmentation = segmentations.get(0); // build the word component from segmentation WordBuilder builder = new WordBuilder(word); for (Segment seg : bestSegmentation.getSegments()) { String lemma = segmentLemmaCache.getUnchecked(seg.getLemma()); builder.addComponent(seg.getBegin(), seg.getEnd(), lemma); if (seg.isNeoclassical()) builder.setCompoundType(CompoundType.NEOCLASSICAL); else builder.setCompoundType(CompoundType.NATIVE); } builder.create(); // log the word composition if (LOGGER.isTraceEnabled()) { List<String> componentStrings = Lists.newArrayList(); for (Component component : word.getComponents()) componentStrings.add(component.toString()); LOGGER.trace("{} [{}]", word.getLemma(), Joiner.on(' ').join(componentStrings)); } } } //finalize progressLoggerTimer.cancel(); LOGGER.debug("segment score cache size: {}", segmentScoreEntries.size()); LOGGER.debug("segment score hit count: " + segmentScoreEntries.stats().hitCount()); LOGGER.debug("segment score hit rate: " + segmentScoreEntries.stats().hitRate()); LOGGER.debug("segment score eviction count: " + segmentScoreEntries.stats().evictionCount()); termIndexResource.getTermIndex().dropCustomIndex(TermIndexes.SINGLE_WORD_LEMMA); segmentScoreEntries.invalidateAll(); segmentLemmaCache.invalidateAll(); }
From source file:br.com.devfest.norte.wear.PagerActivity.java
/** * Renews the timer that causes pages to flip upon providing an answer. *///from w w w . j av a 2 s .com private void renewTimer(final int targetPage) { clearTimer(); mTimerTask = new TimerTask() { @Override public void run() { mHandler.post(new Runnable() { @Override public void run() { mViewPager.setCurrentItem(targetPage); } }); } }; mTimer = new Timer(); mTimer.schedule(mTimerTask, PAGE_FLIP_DELAY_MS); }
From source file:com.android.server.MaybeService.java
private void initializeTimerTask() { mDataDownloadTimer = new Timer(); mDataDownloaderTask = new TimerTask() { public void run() { Log.v(TAG, "Querying server"); if (mIsDeviceRegistered) { synchronized (sDownloadLock) { String deviceMeid = getDeviceMEID(); if (deviceMeid == null) { // TelephonyManager not initialized return; }//from w ww. j a v a 2 s .c om String serverUrl = URL + "/" + deviceMeid; new JSONDownloaderTask().execute(serverUrl); try { sDownloadLock.wait(); parseData(mJSONDownloadData); } catch (InterruptedException e) { e.printStackTrace(); } } //end sync sDownloadLock } else { synchronized (sNetworkCallLock) { new DeviceRegisterTask().execute(URL); try { sNetworkCallLock.wait(); parseData(mJSONResponse); } catch (InterruptedException e) { e.printStackTrace(); } Log.d(TAG, "Received data from TASK: " + mJSONResponse); } //end sync sNetworkCallLock } } }; mDataDownloadTimer.schedule(mDataDownloaderTask, 30000, (mPollInterval * 1000)); }
From source file:com.linkedin.pinot.core.data.manager.realtime.HLRealtimeSegmentDataManager.java
public HLRealtimeSegmentDataManager(final RealtimeSegmentZKMetadata segmentMetadata, final AbstractTableConfig tableConfig, InstanceZKMetadata instanceMetadata, final RealtimeTableDataManager realtimeTableDataManager, final String resourceDataDir, final ReadMode mode, final Schema schema, final ServerMetrics serverMetrics) throws Exception { super();//from ww w . j av a2s .c o m _realtimeTableDataManager = realtimeTableDataManager; final String segmentVersionStr = tableConfig.getIndexingConfig().getSegmentFormatVersion(); _segmentVersion = SegmentVersion.fromStringOrDefault(segmentVersionStr); this.schema = schema; this.extractor = (PlainFieldExtractor) FieldExtractorFactory.getPlainFieldExtractor(schema); this.serverMetrics = serverMetrics; this.segmentName = segmentMetadata.getSegmentName(); this.tableName = tableConfig.getTableName(); IndexingConfig indexingConfig = tableConfig.getIndexingConfig(); if (indexingConfig.getSortedColumn().isEmpty()) { LOGGER.info("RealtimeDataResourceZKMetadata contains no information about sorted column for segment {}", segmentName); this.sortedColumn = null; } else { String firstSortedColumn = indexingConfig.getSortedColumn().get(0); if (this.schema.hasColumn(firstSortedColumn)) { LOGGER.info("Setting sorted column name: {} from RealtimeDataResourceZKMetadata for segment {}", firstSortedColumn, segmentName); this.sortedColumn = firstSortedColumn; } else { LOGGER.warn( "Sorted column name: {} from RealtimeDataResourceZKMetadata is not existed in schema for segment {}.", firstSortedColumn, segmentName); this.sortedColumn = null; } } //inverted index columns invertedIndexColumns = indexingConfig.getInvertedIndexColumns(); if (sortedColumn != null && !invertedIndexColumns.contains(sortedColumn)) { invertedIndexColumns.add(sortedColumn); } this.segmentMetatdaZk = segmentMetadata; // create and init stream provider config // TODO : ideally resourceMetatda should create and give back a streamProviderConfig this.kafkaStreamProviderConfig = new KafkaHighLevelStreamProviderConfig(); this.kafkaStreamProviderConfig.init(tableConfig, instanceMetadata, schema); segmentLogger = LoggerFactory.getLogger(HLRealtimeSegmentDataManager.class.getName() + "_" + segmentName + "_" + kafkaStreamProviderConfig.getStreamName()); segmentLogger.info("Created segment data manager with Sorted column:{}, invertedIndexColumns:{}", sortedColumn, invertedIndexColumns); segmentEndTimeThreshold = start + kafkaStreamProviderConfig.getTimeThresholdToFlushSegment(); this.resourceDir = new File(resourceDataDir); this.resourceTmpDir = new File(resourceDataDir, "_tmp"); if (!resourceTmpDir.exists()) { resourceTmpDir.mkdirs(); } // create and init stream provider final String tableName = tableConfig.getTableName(); this.kafkaStreamProvider = StreamProviderFactory.buildStreamProvider(); this.kafkaStreamProvider.init(kafkaStreamProviderConfig, tableName, serverMetrics); this.kafkaStreamProvider.start(); this.tableStreamName = tableName + "_" + kafkaStreamProviderConfig.getStreamName(); // lets create a new realtime segment segmentLogger.info("Started kafka stream provider"); realtimeSegment = new RealtimeSegmentImpl(schema, kafkaStreamProviderConfig.getSizeThresholdToFlushSegment(), tableName, segmentMetadata.getSegmentName(), kafkaStreamProviderConfig.getStreamName(), serverMetrics, invertedIndexColumns); realtimeSegment.setSegmentMetadata(segmentMetadata, this.schema); notifier = realtimeTableDataManager; segmentStatusTask = new TimerTask() { @Override public void run() { computeKeepIndexing(); } }; // start the indexing thread indexingThread = new Thread(new Runnable() { @Override public void run() { // continue indexing until criteria is met boolean notFull = true; long exceptionSleepMillis = 50L; segmentLogger.info("Starting to collect rows"); do { GenericRow row = null; try { row = kafkaStreamProvider.next(); if (row != null) { row = extractor.transform(row); notFull = realtimeSegment.index(row); exceptionSleepMillis = 50L; } } catch (Exception e) { segmentLogger.warn( "Caught exception while indexing row, sleeping for {} ms, row contents {}", exceptionSleepMillis, row, e); // Sleep for a short time as to avoid filling the logs with exceptions too quickly Uninterruptibles.sleepUninterruptibly(exceptionSleepMillis, TimeUnit.MILLISECONDS); exceptionSleepMillis = Math.min(60000L, exceptionSleepMillis * 2); } catch (Error e) { segmentLogger.error("Caught error in indexing thread", e); throw e; } } while (notFull && keepIndexing && (!isShuttingDown)); if (isShuttingDown) { segmentLogger.info("Shutting down indexing thread!"); return; } try { int numErrors, numConversions, numNulls, numNullCols; if ((numErrors = extractor.getTotalErrors()) > 0) { serverMetrics.addMeteredTableValue(tableStreamName, ServerMeter.ROWS_WITH_ERRORS, (long) numErrors); } Map<String, Integer> errorCount = extractor.getErrorCount(); for (String column : errorCount.keySet()) { if ((numErrors = errorCount.get(column)) > 0) { segmentLogger.warn("Column {} had {} rows with errors", column, numErrors); } } if ((numConversions = extractor.getTotalConversions()) > 0) { serverMetrics.addMeteredTableValue(tableStreamName, ServerMeter.ROWS_NEEDING_CONVERSIONS, (long) numConversions); segmentLogger.info("{} rows needed conversions ", numConversions); } if ((numNulls = extractor.getTotalNulls()) > 0) { serverMetrics.addMeteredTableValue(tableStreamName, ServerMeter.ROWS_WITH_NULL_VALUES, (long) numNulls); segmentLogger.info("{} rows had null columns", numNulls); } if ((numNullCols = extractor.getTotalNullCols()) > 0) { serverMetrics.addMeteredTableValue(tableStreamName, ServerMeter.COLUMNS_WITH_NULL_VALUES, (long) numNullCols); segmentLogger.info("{} columns had null values", numNullCols); } segmentLogger.info("Indexing threshold reached, proceeding with index conversion"); // kill the timer first segmentStatusTask.cancel(); updateCurrentDocumentCountMetrics(); segmentLogger.info("Indexed {} raw events, current number of docs = {}", realtimeSegment.getRawDocumentCount(), realtimeSegment.getSegmentMetadata().getTotalDocs()); File tempSegmentFolder = new File(resourceTmpDir, "tmp-" + String.valueOf(System.currentTimeMillis())); // lets convert the segment now RealtimeSegmentConverter converter = new RealtimeSegmentConverter(realtimeSegment, tempSegmentFolder.getAbsolutePath(), schema, segmentMetadata.getTableName(), segmentMetadata.getSegmentName(), sortedColumn, invertedIndexColumns); segmentLogger.info("Trying to build segment"); final long buildStartTime = System.nanoTime(); converter.build(_segmentVersion); final long buildEndTime = System.nanoTime(); segmentLogger.info("Built segment in {} ms", TimeUnit.MILLISECONDS.convert((buildEndTime - buildStartTime), TimeUnit.NANOSECONDS)); File destDir = new File(resourceDataDir, segmentMetadata.getSegmentName()); FileUtils.deleteQuietly(destDir); FileUtils.moveDirectory(tempSegmentFolder.listFiles()[0], destDir); FileUtils.deleteQuietly(tempSegmentFolder); long segStartTime = realtimeSegment.getMinTime(); long segEndTime = realtimeSegment.getMaxTime(); TimeUnit timeUnit = schema.getTimeFieldSpec().getOutgoingGranularitySpec().getTimeType(); IndexSegment segment = Loaders.IndexSegment.load( new File(resourceDir, segmentMetatdaZk.getSegmentName()), mode, realtimeTableDataManager.getIndexLoadingConfigMetadata()); segmentLogger.info("Committing Kafka offsets"); boolean commitSuccessful = false; try { kafkaStreamProvider.commit(); commitSuccessful = true; kafkaStreamProvider.shutdown(); segmentLogger.info("Successfully committed Kafka offsets, consumer release requested."); } catch (Throwable e) { // If we got here, it means that either the commit or the shutdown failed. Considering that the // KafkaConsumerManager delays shutdown and only adds the consumer to be released in a deferred way, this // likely means that writing the Kafka offsets failed. // // The old logic (mark segment as done, then commit offsets and shutdown the consumer immediately) would die // in a terrible way, leaving the consumer open and causing us to only get half the records from that point // on. In this case, because we keep the consumer open for a little while, we should be okay if the // controller reassigns us a new segment before the consumer gets released. Hopefully by the next time that // we get to committing the offsets, the transient ZK failure that caused the write to fail will not // happen again and everything will be good. // // Several things can happen: // - The controller reassigns us a new segment before we release the consumer (KafkaConsumerManager will // keep the consumer open for about a minute, which should be enough time for the controller to reassign // us a new segment) and the next time we close the segment the offsets commit successfully; we're good. // - The controller reassigns us a new segment, but after we released the consumer (if the controller was // down or there was a ZK failure on writing the Kafka offsets but not the Helix state). We lose whatever // data was in this segment. Not good. // - The server crashes after this comment and before we mark the current segment as done; if the Kafka // offsets didn't get written, then when the server restarts it'll start consuming the current segment // from the previously committed offsets; we're good. // - The server crashes after this comment, the Kafka offsets were written but the segment wasn't marked as // done in Helix, but we got a failure (or not) on the commit; we lose whatever data was in this segment // if we restart the server (not good). If we manually mark the segment as done in Helix by editing the // state in ZK, everything is good, we'll consume a new segment that starts from the correct offsets. // // This is still better than the previous logic, which would have these failure modes: // - Consumer was left open and the controller reassigned us a new segment; consume only half the events // (because there are two consumers and Kafka will try to rebalance partitions between those two) // - We got a segment assigned to us before we got around to committing the offsets, reconsume the data that // we got in this segment again, as we're starting consumption from the previously committed offset (eg. // duplicate data). // // This is still not very satisfactory, which is why this part is due for a redesign. // // Assuming you got here because the realtime offset commit metric has fired, check the logs to determine // which of the above scenarios happened. If you're in one of the good scenarios, then there's nothing to // do. If you're not, then based on how critical it is to get those rows back, then your options are: // - Wipe the realtime table and reconsume everything (mark the replica as disabled so that clients don't // see query results from partially consumed data, then re-enable it when this replica has caught up) // - Accept that those rows are gone in this replica and move on (they'll be replaced by good offline data // soon anyway) // - If there's a replica that has consumed properly, you could shut it down, copy its segments onto this // replica, assign a new consumer group id to this replica, rename the copied segments and edit their // metadata to reflect the new consumer group id, copy the Kafka offsets from the shutdown replica onto // the new consumer group id and then restart both replicas. This should get you the missing rows. segmentLogger.error( "FATAL: Exception committing or shutting down consumer commitSuccessful={}", commitSuccessful, e); serverMetrics.addMeteredTableValue(tableName, ServerMeter.REALTIME_OFFSET_COMMIT_EXCEPTIONS, 1L); if (!commitSuccessful) { kafkaStreamProvider.shutdown(); } } try { segmentLogger.info("Marking current segment as completed in Helix"); RealtimeSegmentZKMetadata metadataToOverwrite = new RealtimeSegmentZKMetadata(); metadataToOverwrite.setTableName(segmentMetadata.getTableName()); metadataToOverwrite.setSegmentName(segmentMetadata.getSegmentName()); metadataToOverwrite.setSegmentType(SegmentType.OFFLINE); metadataToOverwrite.setStatus(Status.DONE); metadataToOverwrite.setStartTime(segStartTime); metadataToOverwrite.setEndTime(segEndTime); metadataToOverwrite.setTotalRawDocs(realtimeSegment.getSegmentMetadata().getTotalDocs()); metadataToOverwrite.setTimeUnit(timeUnit); notifier.notifySegmentCommitted(metadataToOverwrite, segment); segmentLogger.info( "Completed write of segment completion to Helix, waiting for controller to assign a new segment"); } catch (Exception e) { if (commitSuccessful) { segmentLogger.error( "Offsets were committed to Kafka but we were unable to mark this segment as completed in Helix. Manually mark the segment as completed in Helix; restarting this instance will result in data loss.", e); } else { segmentLogger.warn( "Caught exception while marking segment as completed in Helix. Offsets were not written, restarting the instance should be safe.", e); } } } catch (Exception e) { segmentLogger.error("Caught exception in the realtime indexing thread", e); } } }); indexingThread.start(); serverMetrics.addValueToTableGauge(tableName, ServerGauge.SEGMENT_COUNT, 1L); segmentLogger.debug("scheduling keepIndexing timer check"); // start a schedule timer to keep track of the segment TimerService.timer.schedule(segmentStatusTask, ONE_MINUTE_IN_MILLSEC, ONE_MINUTE_IN_MILLSEC); segmentLogger.info("finished scheduling keepIndexing timer check"); }