List of usage examples for java.io File compareTo
public int compareTo(File pathname)
From source file:org.apache.druid.segment.realtime.plumber.RealtimePlumber.java
protected Object bootstrapSinksFromDisk() { final VersioningPolicy versioningPolicy = config.getVersioningPolicy(); File baseDir = computeBaseDir(schema); if (baseDir == null || !baseDir.exists()) { return null; }//ww w . ja v a2 s . c o m File[] files = baseDir.listFiles(); if (files == null) { return null; } Object metadata = null; long latestCommitTime = 0; for (File sinkDir : files) { final Interval sinkInterval = Intervals.of(sinkDir.getName().replace("_", "/")); //final File[] sinkFiles = sinkDir.listFiles(); // To avoid reading and listing of "merged" dir final File[] sinkFiles = sinkDir.listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String fileName) { return !(Ints.tryParse(fileName) == null); } }); Arrays.sort(sinkFiles, new Comparator<File>() { @Override public int compare(File o1, File o2) { try { return Ints.compare(Integer.parseInt(o1.getName()), Integer.parseInt(o2.getName())); } catch (NumberFormatException e) { log.error(e, "Couldn't compare as numbers? [%s][%s]", o1, o2); return o1.compareTo(o2); } } }); boolean isCorrupted = false; List<FireHydrant> hydrants = Lists.newArrayList(); for (File segmentDir : sinkFiles) { log.info("Loading previously persisted segment at [%s]", segmentDir); // Although this has been tackled at start of this method. // Just a doubly-check added to skip "merged" dir. from being added to hydrants // If 100% sure that this is not needed, this check can be removed. if (Ints.tryParse(segmentDir.getName()) == null) { continue; } QueryableIndex queryableIndex = null; try { queryableIndex = indexIO.loadIndex(segmentDir); } catch (IOException e) { log.error(e, "Problem loading segmentDir from disk."); isCorrupted = true; } if (isCorrupted) { try { File corruptSegmentDir = computeCorruptedFileDumpDir(segmentDir, schema); log.info("Renaming %s to %s", segmentDir.getAbsolutePath(), corruptSegmentDir.getAbsolutePath()); FileUtils.copyDirectory(segmentDir, corruptSegmentDir); FileUtils.deleteDirectory(segmentDir); } catch (Exception e1) { log.error(e1, "Failed to rename %s", segmentDir.getAbsolutePath()); } //Note: skipping corrupted segment might lead to dropping some data. This strategy should be changed //at some point. continue; } Metadata segmentMetadata = queryableIndex.getMetadata(); if (segmentMetadata != null) { Object timestampObj = segmentMetadata.get(COMMIT_METADATA_TIMESTAMP_KEY); if (timestampObj != null) { long timestamp = ((Long) timestampObj).longValue(); if (timestamp > latestCommitTime) { log.info( "Found metaData [%s] with latestCommitTime [%s] greater than previous recorded [%s]", queryableIndex.getMetadata(), timestamp, latestCommitTime); latestCommitTime = timestamp; metadata = queryableIndex.getMetadata().get(COMMIT_METADATA_KEY); } } } hydrants.add( new FireHydrant(new QueryableIndexSegment( DataSegment.makeDataSegmentIdentifier(schema.getDataSource(), sinkInterval.getStart(), sinkInterval.getEnd(), versioningPolicy.getVersion(sinkInterval), config.getShardSpec()), queryableIndex), Integer.parseInt(segmentDir.getName()))); } if (hydrants.isEmpty()) { // Probably encountered a corrupt sink directory log.warn( "Found persisted segment directory with no intermediate segments present at %s, skipping sink creation.", sinkDir.getAbsolutePath()); continue; } final Sink currSink = new Sink(sinkInterval, schema, config.getShardSpec(), versioningPolicy.getVersion(sinkInterval), config.getMaxRowsInMemory(), TuningConfigs.getMaxBytesInMemoryOrDefault(config.getMaxBytesInMemory()), config.isReportParseExceptions(), config.getDedupColumn(), hydrants); addSink(currSink); } return metadata; }
From source file:io.druid.segment.realtime.plumber.RealtimePlumber.java
protected Object bootstrapSinksFromDisk() { final VersioningPolicy versioningPolicy = config.getVersioningPolicy(); File baseDir = computeBaseDir(schema); if (baseDir == null || !baseDir.exists()) { return null; }/* w w w . j ava 2 s . c o m*/ File[] files = baseDir.listFiles(); if (files == null) { return null; } Object metadata = null; long latestCommitTime = 0; for (File sinkDir : files) { Interval sinkInterval = new Interval(sinkDir.getName().replace("_", "/")); //final File[] sinkFiles = sinkDir.listFiles(); // To avoid reading and listing of "merged" dir final File[] sinkFiles = sinkDir.listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String fileName) { return !(Ints.tryParse(fileName) == null); } }); Arrays.sort(sinkFiles, new Comparator<File>() { @Override public int compare(File o1, File o2) { try { return Ints.compare(Integer.parseInt(o1.getName()), Integer.parseInt(o2.getName())); } catch (NumberFormatException e) { log.error(e, "Couldn't compare as numbers? [%s][%s]", o1, o2); return o1.compareTo(o2); } } }); boolean isCorrupted = false; try { List<FireHydrant> hydrants = Lists.newArrayList(); for (File segmentDir : sinkFiles) { log.info("Loading previously persisted segment at [%s]", segmentDir); // Although this has been tackled at start of this method. // Just a doubly-check added to skip "merged" dir. from being added to hydrants // If 100% sure that this is not needed, this check can be removed. if (Ints.tryParse(segmentDir.getName()) == null) { continue; } QueryableIndex queryableIndex = null; try { queryableIndex = IndexIO.loadIndex(segmentDir); } catch (IOException e) { log.error(e, "Problem loading segmentDir from disk."); isCorrupted = true; } if (isCorrupted) { try { File corruptSegmentDir = computeCorruptedFileDumpDir(segmentDir, schema); log.info("Renaming %s to %s", segmentDir.getAbsolutePath(), corruptSegmentDir.getAbsolutePath()); FileUtils.copyDirectory(segmentDir, corruptSegmentDir); FileUtils.deleteDirectory(segmentDir); } catch (Exception e1) { log.error(e1, "Failed to rename %s", segmentDir.getAbsolutePath()); } //Note: skipping corrupted segment might lead to dropping some data. This strategy should be changed //at some point. continue; } Map<String, Object> segmentMetadata = queryableIndex.getMetaData(); if (segmentMetadata != null) { Object timestampObj = segmentMetadata.get(COMMIT_METADATA_TIMESTAMP_KEY); if (timestampObj != null) { long timestamp = ((Long) timestampObj).longValue(); if (timestamp > latestCommitTime) { log.info( "Found metaData [%s] with latestCommitTime [%s] greater than previous recorded [%s]", queryableIndex.getMetaData(), timestamp, latestCommitTime); latestCommitTime = timestamp; metadata = queryableIndex.getMetaData().get(COMMIT_METADATA_KEY); } } } hydrants.add( new FireHydrant(new QueryableIndexSegment( DataSegment.makeDataSegmentIdentifier(schema.getDataSource(), sinkInterval.getStart(), sinkInterval.getEnd(), versioningPolicy.getVersion(sinkInterval), config.getShardSpec()), queryableIndex), Integer.parseInt(segmentDir.getName()))); } if (hydrants.isEmpty()) { // Probably encountered a corrupt sink directory log.warn( "Found persisted segment directory with no intermediate segments present at %s, skipping sink creation.", sinkDir.getAbsolutePath()); continue; } Sink currSink = new Sink(sinkInterval, schema, config, versioningPolicy.getVersion(sinkInterval), hydrants); sinks.put(sinkInterval.getStartMillis(), currSink); sinkTimeline.add(currSink.getInterval(), currSink.getVersion(), new SingleElementPartitionChunk<Sink>(currSink)); segmentAnnouncer.announceSegment(currSink.getSegment()); } catch (IOException e) { log.makeAlert(e, "Problem loading sink[%s] from disk.", schema.getDataSource()) .addData("interval", sinkInterval).emit(); } } return metadata; }
From source file:org.y20k.trackbook.helpers.StorageHelper.java
private File[] sortFiles(File[] files) { // sort array LogHelper.v(LOG_TAG, "Sorting files."); Arrays.sort(files, new Comparator<File>() { @Override// www .j a v a 2s .c o m public int compare(File file1, File file2) { // discard temp file and files not ending with ".trackbook" boolean file1IsTrack = file1.getName().endsWith(FILE_TYPE_TRACKBOOK_EXTENSION) && !file1.equals(mTempFile); boolean file2IsTrack = file2.getName().endsWith(FILE_TYPE_TRACKBOOK_EXTENSION) && !file2.equals(mTempFile); // note: "greater" means higher index in array if (!file1IsTrack && file2IsTrack) { // file1 is not a track, file1 is greater return 1; } else if (!file2IsTrack && file1IsTrack) { // file2 is not a track, file2 is greater return -1; } else { // "compareTo" compares abstract path names lexicographically | 0 == equal | -1 == file2 less than file1 | 1 == file2 greater than file1 return file2.compareTo(file1); } } }); // log sorting result // TODO comment out for release // String fileList = ""; // for (File file : files) { // fileList = fileList + file.getName() + "\n"; // } // LogHelper.v(LOG_TAG, "+++ List of files +++\n" + fileList); // hand back sorted array of files return files; }
From source file:com.metamx.druid.realtime.plumber.RealtimePlumberSchool.java
@Override public Plumber findPlumber(final Schema schema, final FireDepartmentMetrics metrics) { verifyState();/*from ww w . j av a2 s .c o m*/ final RejectionPolicy rejectionPolicy = rejectionPolicyFactory.create(windowPeriod); log.info("Creating plumber using rejectionPolicy[%s]", rejectionPolicy); return new Plumber() { private volatile boolean stopped = false; private volatile ExecutorService persistExecutor = null; private volatile ScheduledExecutorService scheduledExecutor = null; private final Map<Long, Sink> sinks = Maps.newConcurrentMap(); private final VersionedIntervalTimeline<String, Sink> sinkTimeline = new VersionedIntervalTimeline<String, Sink>( String.CASE_INSENSITIVE_ORDER); @Override public void startJob() { computeBaseDir(schema).mkdirs(); initializeExecutors(); bootstrapSinksFromDisk(); registerServerViewCallback(); startPersistThread(); } @Override public Sink getSink(long timestamp) { if (!rejectionPolicy.accept(timestamp)) { return null; } final long truncatedTime = segmentGranularity.truncate(timestamp); Sink retVal = sinks.get(truncatedTime); if (retVal == null) { final Interval sinkInterval = new Interval(new DateTime(truncatedTime), segmentGranularity.increment(new DateTime(truncatedTime))); retVal = new Sink(sinkInterval, schema, versioningPolicy.getVersion(sinkInterval)); try { segmentAnnouncer.announceSegment(retVal.getSegment()); sinks.put(truncatedTime, retVal); sinkTimeline.add(retVal.getInterval(), retVal.getVersion(), new SingleElementPartitionChunk<Sink>(retVal)); } catch (IOException e) { log.makeAlert(e, "Failed to announce new segment[%s]", schema.getDataSource()) .addData("interval", retVal.getInterval()).emit(); } } return retVal; } @Override public <T> QueryRunner<T> getQueryRunner(final Query<T> query) { final QueryRunnerFactory<T, Query<T>> factory = conglomerate.findFactory(query); final QueryToolChest<T, Query<T>> toolchest = factory.getToolchest(); final Function<Query<T>, ServiceMetricEvent.Builder> builderFn = new Function<Query<T>, ServiceMetricEvent.Builder>() { @Override public ServiceMetricEvent.Builder apply(@Nullable Query<T> input) { return toolchest.makeMetricBuilder(query); } }; List<TimelineObjectHolder<String, Sink>> querySinks = Lists.newArrayList(); for (Interval interval : query.getIntervals()) { querySinks.addAll(sinkTimeline.lookup(interval)); } return toolchest.mergeResults(factory.mergeRunners(EXEC, FunctionalIterable.create(querySinks) .transform(new Function<TimelineObjectHolder<String, Sink>, QueryRunner<T>>() { @Override public QueryRunner<T> apply(TimelineObjectHolder<String, Sink> holder) { final Sink theSink = holder.getObject().getChunk(0).getObject(); return new SpecificSegmentQueryRunner<T>(new MetricsEmittingQueryRunner<T>(emitter, builderFn, factory.mergeRunners(EXEC, Iterables.transform(theSink, new Function<FireHydrant, QueryRunner<T>>() { @Override public QueryRunner<T> apply(FireHydrant input) { return factory.createRunner(input.getSegment()); } }))), new SpecificSegmentSpec(new SegmentDescriptor(holder.getInterval(), theSink.getSegment().getVersion(), theSink.getSegment().getShardSpec().getPartitionNum()))); } }))); } @Override public void persist(final Runnable commitRunnable) { final List<Pair<FireHydrant, Interval>> indexesToPersist = Lists.newArrayList(); for (Sink sink : sinks.values()) { if (sink.swappable()) { indexesToPersist.add(Pair.of(sink.swap(), sink.getInterval())); } } log.info("Submitting persist runnable for dataSource[%s]", schema.getDataSource()); persistExecutor.execute(new ThreadRenamingRunnable( String.format("%s-incremental-persist", schema.getDataSource())) { @Override public void doRun() { for (Pair<FireHydrant, Interval> pair : indexesToPersist) { metrics.incrementRowOutputCount(persistHydrant(pair.lhs, schema, pair.rhs)); } commitRunnable.run(); } }); } // Submits persist-n-merge task for a Sink to the persistExecutor private void persistAndMerge(final long truncatedTime, final Sink sink) { final String threadName = String.format("%s-%s-persist-n-merge", schema.getDataSource(), new DateTime(truncatedTime)); persistExecutor.execute(new ThreadRenamingRunnable(threadName) { @Override public void doRun() { final Interval interval = sink.getInterval(); for (FireHydrant hydrant : sink) { if (!hydrant.hasSwapped()) { log.info("Hydrant[%s] hasn't swapped yet, swapping. Sink[%s]", hydrant, sink); final int rowCount = persistHydrant(hydrant, schema, interval); metrics.incrementRowOutputCount(rowCount); } } final File mergedTarget = new File(computePersistDir(schema, interval), "merged"); if (mergedTarget.exists()) { log.info("Skipping already-merged sink: %s", sink); return; } File mergedFile = null; try { List<QueryableIndex> indexes = Lists.newArrayList(); for (FireHydrant fireHydrant : sink) { Segment segment = fireHydrant.getSegment(); final QueryableIndex queryableIndex = segment.asQueryableIndex(); log.info("Adding hydrant[%s]", fireHydrant); indexes.add(queryableIndex); } mergedFile = IndexMerger.mergeQueryableIndex(indexes, schema.getAggregators(), mergedTarget); QueryableIndex index = IndexIO.loadIndex(mergedFile); DataSegment segment = dataSegmentPusher.push(mergedFile, sink.getSegment() .withDimensions(Lists.newArrayList(index.getAvailableDimensions()))); segmentPublisher.publishSegment(segment); } catch (IOException e) { log.makeAlert(e, "Failed to persist merged index[%s]", schema.getDataSource()) .addData("interval", interval).emit(); } if (mergedFile != null) { try { if (mergedFile != null) { log.info("Deleting Index File[%s]", mergedFile); FileUtils.deleteDirectory(mergedFile); } } catch (IOException e) { log.warn(e, "Error deleting directory[%s]", mergedFile); } } } }); } @Override public void finishJob() { log.info("Shutting down..."); for (final Map.Entry<Long, Sink> entry : sinks.entrySet()) { persistAndMerge(entry.getKey(), entry.getValue()); } while (!sinks.isEmpty()) { try { log.info("Cannot shut down yet! Sinks remaining: %s", Joiner.on(", ") .join(Iterables.transform(sinks.values(), new Function<Sink, String>() { @Override public String apply(Sink input) { return input.getSegment().getIdentifier(); } }))); synchronized (handoffCondition) { while (!sinks.isEmpty()) { handoffCondition.wait(); } } } catch (InterruptedException e) { throw Throwables.propagate(e); } } // scheduledExecutor is shutdown here, but persistExecutor is shutdown when the // ServerView sends it a new segment callback if (scheduledExecutor != null) { scheduledExecutor.shutdown(); } stopped = true; } private void initializeExecutors() { if (persistExecutor == null) { persistExecutor = Executors.newFixedThreadPool(1, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("plumber_persist_%d").build()); } if (scheduledExecutor == null) { scheduledExecutor = Executors.newScheduledThreadPool(1, new ThreadFactoryBuilder() .setDaemon(true).setNameFormat("plumber_scheduled_%d").build()); } } private void bootstrapSinksFromDisk() { for (File sinkDir : computeBaseDir(schema).listFiles()) { Interval sinkInterval = new Interval(sinkDir.getName().replace("_", "/")); //final File[] sinkFiles = sinkDir.listFiles(); // To avoid reading and listing of "merged" dir final File[] sinkFiles = sinkDir.listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String fileName) { return !(Ints.tryParse(fileName) == null); } }); Arrays.sort(sinkFiles, new Comparator<File>() { @Override public int compare(File o1, File o2) { try { return Ints.compare(Integer.parseInt(o1.getName()), Integer.parseInt(o2.getName())); } catch (NumberFormatException e) { log.error(e, "Couldn't compare as numbers? [%s][%s]", o1, o2); return o1.compareTo(o2); } } }); try { List<FireHydrant> hydrants = Lists.newArrayList(); for (File segmentDir : sinkFiles) { log.info("Loading previously persisted segment at [%s]", segmentDir); // Although this has been tackled at start of this method. // Just a doubly-check added to skip "merged" dir. from being added to hydrants // If 100% sure that this is not needed, this check can be removed. if (Ints.tryParse(segmentDir.getName()) == null) { continue; } hydrants.add( new FireHydrant(new QueryableIndexSegment(null, IndexIO.loadIndex(segmentDir)), Integer.parseInt(segmentDir.getName()))); } Sink currSink = new Sink(sinkInterval, schema, versioningPolicy.getVersion(sinkInterval), hydrants); sinks.put(sinkInterval.getStartMillis(), currSink); sinkTimeline.add(currSink.getInterval(), currSink.getVersion(), new SingleElementPartitionChunk<Sink>(currSink)); segmentAnnouncer.announceSegment(currSink.getSegment()); } catch (IOException e) { log.makeAlert(e, "Problem loading sink[%s] from disk.", schema.getDataSource()) .addData("interval", sinkInterval).emit(); } } } private void registerServerViewCallback() { serverView.registerSegmentCallback(persistExecutor, new ServerView.BaseSegmentCallback() { @Override public ServerView.CallbackAction segmentAdded(DruidServer server, DataSegment segment) { if (stopped) { log.info("Unregistering ServerViewCallback"); persistExecutor.shutdown(); return ServerView.CallbackAction.UNREGISTER; } if ("realtime".equals(server.getType())) { return ServerView.CallbackAction.CONTINUE; } log.debug("Checking segment[%s] on server[%s]", segment, server); if (schema.getDataSource().equals(segment.getDataSource())) { final Interval interval = segment.getInterval(); for (Map.Entry<Long, Sink> entry : sinks.entrySet()) { final Long sinkKey = entry.getKey(); if (interval.contains(sinkKey)) { final Sink sink = entry.getValue(); log.info("Segment[%s] matches sink[%s] on server[%s]", segment, sink, server); final String segmentVersion = segment.getVersion(); final String sinkVersion = sink.getSegment().getVersion(); if (segmentVersion.compareTo(sinkVersion) >= 0) { log.info("Segment version[%s] >= sink version[%s]", segmentVersion, sinkVersion); try { segmentAnnouncer.unannounceSegment(sink.getSegment()); FileUtils .deleteDirectory(computePersistDir(schema, sink.getInterval())); log.info("Removing sinkKey %d for segment %s", sinkKey, sink.getSegment().getIdentifier()); sinks.remove(sinkKey); sinkTimeline.remove(sink.getInterval(), sink.getVersion(), new SingleElementPartitionChunk<Sink>(sink)); synchronized (handoffCondition) { handoffCondition.notifyAll(); } } catch (IOException e) { log.makeAlert(e, "Unable to delete old segment for dataSource[%s].", schema.getDataSource()).addData("interval", sink.getInterval()) .emit(); } } } } } return ServerView.CallbackAction.CONTINUE; } }); } private void startPersistThread() { final long truncatedNow = segmentGranularity.truncate(new DateTime()).getMillis(); final long windowMillis = windowPeriod.toStandardDuration().getMillis(); log.info("Expect to run at [%s]", new DateTime().plus(new Duration(System.currentTimeMillis(), segmentGranularity.increment(truncatedNow) + windowMillis))); ScheduledExecutors.scheduleAtFixedRate(scheduledExecutor, new Duration(System.currentTimeMillis(), segmentGranularity.increment(truncatedNow) + windowMillis), new Duration(truncatedNow, segmentGranularity.increment(truncatedNow)), new ThreadRenamingCallable<ScheduledExecutors.Signal>(String.format("%s-overseer-%d", schema.getDataSource(), schema.getShardSpec().getPartitionNum())) { @Override public ScheduledExecutors.Signal doCall() { if (stopped) { log.info("Stopping merge-n-push overseer thread"); return ScheduledExecutors.Signal.STOP; } log.info("Starting merge and push."); long minTimestamp = segmentGranularity .truncate(rejectionPolicy.getCurrMaxTime().minus(windowMillis)).getMillis(); List<Map.Entry<Long, Sink>> sinksToPush = Lists.newArrayList(); for (Map.Entry<Long, Sink> entry : sinks.entrySet()) { final Long intervalStart = entry.getKey(); if (intervalStart < minTimestamp) { log.info("Adding entry[%s] for merge and push.", entry); sinksToPush.add(entry); } } for (final Map.Entry<Long, Sink> entry : sinksToPush) { persistAndMerge(entry.getKey(), entry.getValue()); } if (stopped) { log.info("Stopping merge-n-push overseer thread"); return ScheduledExecutors.Signal.STOP; } else { return ScheduledExecutors.Signal.REPEAT; } } }); } }; }
From source file:io.druid.segment.realtime.plumber.RealtimePlumberSchool.java
@Override public Plumber findPlumber(final Schema schema, final FireDepartmentMetrics metrics) { verifyState();//from w w w .ja va 2 s . com final RejectionPolicy rejectionPolicy = rejectionPolicyFactory.create(windowPeriod); log.info("Creating plumber using rejectionPolicy[%s]", rejectionPolicy); return new Plumber() { private volatile boolean stopped = false; private volatile ExecutorService persistExecutor = null; private volatile ScheduledExecutorService scheduledExecutor = null; private final Map<Long, Sink> sinks = Maps.newConcurrentMap(); private final VersionedIntervalTimeline<String, Sink> sinkTimeline = new VersionedIntervalTimeline<String, Sink>( String.CASE_INSENSITIVE_ORDER); @Override public void startJob() { computeBaseDir(schema).mkdirs(); initializeExecutors(); bootstrapSinksFromDisk(); registerServerViewCallback(); startPersistThread(); } @Override public Sink getSink(long timestamp) { if (!rejectionPolicy.accept(timestamp)) { return null; } final long truncatedTime = segmentGranularity.truncate(timestamp); Sink retVal = sinks.get(truncatedTime); if (retVal == null) { final Interval sinkInterval = new Interval(new DateTime(truncatedTime), segmentGranularity.increment(new DateTime(truncatedTime))); retVal = new Sink(sinkInterval, schema, versioningPolicy.getVersion(sinkInterval)); try { segmentAnnouncer.announceSegment(retVal.getSegment()); sinks.put(truncatedTime, retVal); sinkTimeline.add(retVal.getInterval(), retVal.getVersion(), new SingleElementPartitionChunk<Sink>(retVal)); } catch (IOException e) { log.makeAlert(e, "Failed to announce new segment[%s]", schema.getDataSource()) .addData("interval", retVal.getInterval()).emit(); } } return retVal; } @Override public <T> QueryRunner<T> getQueryRunner(final Query<T> query) { final QueryRunnerFactory<T, Query<T>> factory = conglomerate.findFactory(query); final QueryToolChest<T, Query<T>> toolchest = factory.getToolchest(); final Function<Query<T>, ServiceMetricEvent.Builder> builderFn = new Function<Query<T>, ServiceMetricEvent.Builder>() { @Override public ServiceMetricEvent.Builder apply(@Nullable Query<T> input) { return toolchest.makeMetricBuilder(query); } }; List<TimelineObjectHolder<String, Sink>> querySinks = Lists.newArrayList(); for (Interval interval : query.getIntervals()) { querySinks.addAll(sinkTimeline.lookup(interval)); } return toolchest.mergeResults( factory.mergeRunners(queryExecutorService, FunctionalIterable.create(querySinks) .transform(new Function<TimelineObjectHolder<String, Sink>, QueryRunner<T>>() { @Override public QueryRunner<T> apply(TimelineObjectHolder<String, Sink> holder) { final Sink theSink = holder.getObject().getChunk(0).getObject(); return new SpecificSegmentQueryRunner<T>(new MetricsEmittingQueryRunner<T>( emitter, builderFn, factory.mergeRunners(MoreExecutors.sameThreadExecutor(), Iterables.transform(theSink, new Function<FireHydrant, QueryRunner<T>>() { @Override public QueryRunner<T> apply(FireHydrant input) { return factory .createRunner(input.getSegment()); } }))), new SpecificSegmentSpec(new SegmentDescriptor(holder.getInterval(), theSink.getSegment().getVersion(), theSink.getSegment().getShardSpec().getPartitionNum()))); } }))); } @Override public void persist(final Runnable commitRunnable) { final List<Pair<FireHydrant, Interval>> indexesToPersist = Lists.newArrayList(); for (Sink sink : sinks.values()) { if (sink.swappable()) { indexesToPersist.add(Pair.of(sink.swap(), sink.getInterval())); } } log.info("Submitting persist runnable for dataSource[%s]", schema.getDataSource()); persistExecutor.execute(new ThreadRenamingRunnable( String.format("%s-incremental-persist", schema.getDataSource())) { @Override public void doRun() { for (Pair<FireHydrant, Interval> pair : indexesToPersist) { metrics.incrementRowOutputCount(persistHydrant(pair.lhs, schema, pair.rhs)); } commitRunnable.run(); } }); } // Submits persist-n-merge task for a Sink to the persistExecutor private void persistAndMerge(final long truncatedTime, final Sink sink) { final String threadName = String.format("%s-%s-persist-n-merge", schema.getDataSource(), new DateTime(truncatedTime)); persistExecutor.execute(new ThreadRenamingRunnable(threadName) { @Override public void doRun() { final Interval interval = sink.getInterval(); for (FireHydrant hydrant : sink) { if (!hydrant.hasSwapped()) { log.info("Hydrant[%s] hasn't swapped yet, swapping. Sink[%s]", hydrant, sink); final int rowCount = persistHydrant(hydrant, schema, interval); metrics.incrementRowOutputCount(rowCount); } } final File mergedTarget = new File(computePersistDir(schema, interval), "merged"); if (mergedTarget.exists()) { log.info("Skipping already-merged sink: %s", sink); return; } File mergedFile = null; try { List<QueryableIndex> indexes = Lists.newArrayList(); for (FireHydrant fireHydrant : sink) { Segment segment = fireHydrant.getSegment(); final QueryableIndex queryableIndex = segment.asQueryableIndex(); log.info("Adding hydrant[%s]", fireHydrant); indexes.add(queryableIndex); } mergedFile = IndexMerger.mergeQueryableIndex(indexes, schema.getAggregators(), mergedTarget); QueryableIndex index = IndexIO.loadIndex(mergedFile); DataSegment segment = dataSegmentPusher.push(mergedFile, sink.getSegment() .withDimensions(Lists.newArrayList(index.getAvailableDimensions()))); segmentPublisher.publishSegment(segment); } catch (IOException e) { log.makeAlert(e, "Failed to persist merged index[%s]", schema.getDataSource()) .addData("interval", interval).emit(); } if (mergedFile != null) { try { if (mergedFile != null) { log.info("Deleting Index File[%s]", mergedFile); FileUtils.deleteDirectory(mergedFile); } } catch (IOException e) { log.warn(e, "Error deleting directory[%s]", mergedFile); } } } }); } @Override public void finishJob() { log.info("Shutting down..."); for (final Map.Entry<Long, Sink> entry : sinks.entrySet()) { persistAndMerge(entry.getKey(), entry.getValue()); } while (!sinks.isEmpty()) { try { log.info("Cannot shut down yet! Sinks remaining: %s", Joiner.on(", ") .join(Iterables.transform(sinks.values(), new Function<Sink, String>() { @Override public String apply(Sink input) { return input.getSegment().getIdentifier(); } }))); synchronized (handoffCondition) { while (!sinks.isEmpty()) { handoffCondition.wait(); } } } catch (InterruptedException e) { throw Throwables.propagate(e); } } // scheduledExecutor is shutdown here, but persistExecutor is shutdown when the // ServerView sends it a new segment callback if (scheduledExecutor != null) { scheduledExecutor.shutdown(); } stopped = true; } private void initializeExecutors() { if (persistExecutor == null) { persistExecutor = Executors.newFixedThreadPool(1, new ThreadFactoryBuilder().setDaemon(true).setNameFormat("plumber_persist_%d").build()); } if (scheduledExecutor == null) { scheduledExecutor = Executors.newScheduledThreadPool(1, new ThreadFactoryBuilder() .setDaemon(true).setNameFormat("plumber_scheduled_%d").build()); } } private void bootstrapSinksFromDisk() { File baseDir = computeBaseDir(schema); if (baseDir == null || !baseDir.exists()) { return; } File[] files = baseDir.listFiles(); if (files == null) { return; } for (File sinkDir : files) { Interval sinkInterval = new Interval(sinkDir.getName().replace("_", "/")); //final File[] sinkFiles = sinkDir.listFiles(); // To avoid reading and listing of "merged" dir final File[] sinkFiles = sinkDir.listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String fileName) { return !(Ints.tryParse(fileName) == null); } }); Arrays.sort(sinkFiles, new Comparator<File>() { @Override public int compare(File o1, File o2) { try { return Ints.compare(Integer.parseInt(o1.getName()), Integer.parseInt(o2.getName())); } catch (NumberFormatException e) { log.error(e, "Couldn't compare as numbers? [%s][%s]", o1, o2); return o1.compareTo(o2); } } }); try { List<FireHydrant> hydrants = Lists.newArrayList(); for (File segmentDir : sinkFiles) { log.info("Loading previously persisted segment at [%s]", segmentDir); // Although this has been tackled at start of this method. // Just a doubly-check added to skip "merged" dir. from being added to hydrants // If 100% sure that this is not needed, this check can be removed. if (Ints.tryParse(segmentDir.getName()) == null) { continue; } hydrants.add( new FireHydrant(new QueryableIndexSegment(null, IndexIO.loadIndex(segmentDir)), Integer.parseInt(segmentDir.getName()))); } Sink currSink = new Sink(sinkInterval, schema, versioningPolicy.getVersion(sinkInterval), hydrants); sinks.put(sinkInterval.getStartMillis(), currSink); sinkTimeline.add(currSink.getInterval(), currSink.getVersion(), new SingleElementPartitionChunk<Sink>(currSink)); segmentAnnouncer.announceSegment(currSink.getSegment()); } catch (IOException e) { log.makeAlert(e, "Problem loading sink[%s] from disk.", schema.getDataSource()) .addData("interval", sinkInterval).emit(); } } } private void registerServerViewCallback() { serverView.registerSegmentCallback(persistExecutor, new ServerView.BaseSegmentCallback() { @Override public ServerView.CallbackAction segmentAdded(DruidServer server, DataSegment segment) { if (stopped) { log.info("Unregistering ServerViewCallback"); persistExecutor.shutdown(); return ServerView.CallbackAction.UNREGISTER; } if ("realtime".equals(server.getType())) { return ServerView.CallbackAction.CONTINUE; } log.debug("Checking segment[%s] on server[%s]", segment, server); if (schema.getDataSource().equals(segment.getDataSource())) { final Interval interval = segment.getInterval(); for (Map.Entry<Long, Sink> entry : sinks.entrySet()) { final Long sinkKey = entry.getKey(); if (interval.contains(sinkKey)) { final Sink sink = entry.getValue(); log.info("Segment[%s] matches sink[%s] on server[%s]", segment, sink, server); final String segmentVersion = segment.getVersion(); final String sinkVersion = sink.getSegment().getVersion(); if (segmentVersion.compareTo(sinkVersion) >= 0) { log.info("Segment version[%s] >= sink version[%s]", segmentVersion, sinkVersion); try { segmentAnnouncer.unannounceSegment(sink.getSegment()); FileUtils .deleteDirectory(computePersistDir(schema, sink.getInterval())); log.info("Removing sinkKey %d for segment %s", sinkKey, sink.getSegment().getIdentifier()); sinks.remove(sinkKey); sinkTimeline.remove(sink.getInterval(), sink.getVersion(), new SingleElementPartitionChunk<Sink>(sink)); synchronized (handoffCondition) { handoffCondition.notifyAll(); } } catch (IOException e) { log.makeAlert(e, "Unable to delete old segment for dataSource[%s].", schema.getDataSource()).addData("interval", sink.getInterval()) .emit(); } } } } } return ServerView.CallbackAction.CONTINUE; } }); } private void startPersistThread() { final long truncatedNow = segmentGranularity.truncate(new DateTime()).getMillis(); final long windowMillis = windowPeriod.toStandardDuration().getMillis(); log.info("Expect to run at [%s]", new DateTime().plus(new Duration(System.currentTimeMillis(), segmentGranularity.increment(truncatedNow) + windowMillis))); ScheduledExecutors.scheduleAtFixedRate(scheduledExecutor, new Duration(System.currentTimeMillis(), segmentGranularity.increment(truncatedNow) + windowMillis), new Duration(truncatedNow, segmentGranularity.increment(truncatedNow)), new ThreadRenamingCallable<ScheduledExecutors.Signal>(String.format("%s-overseer-%d", schema.getDataSource(), schema.getShardSpec().getPartitionNum())) { @Override public ScheduledExecutors.Signal doCall() { if (stopped) { log.info("Stopping merge-n-push overseer thread"); return ScheduledExecutors.Signal.STOP; } log.info("Starting merge and push."); long minTimestamp = segmentGranularity .truncate(rejectionPolicy.getCurrMaxTime().minus(windowMillis)).getMillis(); List<Map.Entry<Long, Sink>> sinksToPush = Lists.newArrayList(); for (Map.Entry<Long, Sink> entry : sinks.entrySet()) { final Long intervalStart = entry.getKey(); if (intervalStart < minTimestamp) { log.info("Adding entry[%s] for merge and push.", entry); sinksToPush.add(entry); } } for (final Map.Entry<Long, Sink> entry : sinksToPush) { persistAndMerge(entry.getKey(), entry.getValue()); } if (stopped) { log.info("Stopping merge-n-push overseer thread"); return ScheduledExecutors.Signal.STOP; } else { return ScheduledExecutors.Signal.REPEAT; } } }); } }; }
From source file:edu.jhu.cvrg.services.nodeDataService.DataStaging.java
/** Service to retrieve the public meta-data for each subject associated with a userID. * Currently uses File class to access the local file system for the information. * //from w ww. j a va 2 s. c om * @param param0 OMElement containing the parameters:<BR/> * userId and logindatetime (the later is currently unused) * * @return OMElements containing a collection of meta-data for each subject:<BR/> * status, subjectid, rdtfile, channels, samplingrate, numberofpoints, filename, filesize */ public org.apache.axiom.om.OMElement collectSubjectData(org.apache.axiom.om.OMElement param0) { if (verbose) System.out.println("collectSubjectData() called."); int x = 0; String sId = null; OMElement subject = null, subjectId = null, ecgFile = null, channels = null, samplingRate = null, numberOfPoints = null, filename = null, filesize = null; OMFactory factory = OMAbstractFactory.getOMFactory(); OMNamespace dsNs = factory.createOMNamespace("http://www.cvrgrid.org/nodeDataService/", "dataStaging"); OMElement collectSubjects = factory.createOMElement("collectSubjects", dsNs); OMElement status = factory.createOMElement("status", dsNs); status.addChild(factory.createOMText("success")); collectSubjects.addChild(status); OMElement subjects = factory.createOMElement("subjects", dsNs); Iterator iterator = param0.getChildren(); String userId = ((OMElement) iterator.next()).getText(); //------------------------------------------------------------ //TODO: replace this with XML database to speed up loading. File publicRootFolder = new File(localFtpRoot + sep + userId + sep + "public"); File privateRootFolder = new File(localFtpRoot + sep + userId + sep + "private"); File[] publicFolders = publicRootFolder.listFiles(); File[] privateFolders = privateRootFolder.listFiles(); try { if (publicFolders != null) { // sort public folders if (publicFolders.length > 0) { debugPrintln("Sorting public folders"); Arrays.sort(publicFolders, new Comparator<File>() { public int compare(File f1, File f2) { return f1.compareTo(f2); } }); } // Parse the Public files for ecg meta-data // if (verbose) System.out.println("Public files"); parseFolderArray(publicFolders, subjects, factory, dsNs); } if (privateFolders != null) { // sort private folders. if (privateFolders.length > 0) { debugPrintln("Sorting private folders"); Arrays.sort(privateFolders, new Comparator<File>() { public int compare(File f3, File f4) { return f3.compareTo(f4); } }); } // Parse the Private files for ecg meta-data // if (verbose) System.out.println("Private files"); parseFolderArray(privateFolders, subjects, factory, dsNs); } } catch (Exception ex) { ex.printStackTrace(); } collectSubjects.addChild(subjects); debugPrintln("collectSubjectData() finished, found " + privateFolders.length + " private folders."); return collectSubjects; }
From source file:org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.java
/** * Reconcile the difference between blocks on the disk and blocks in * volumeMap/* ww w . ja v a 2 s .c om*/ * <p/> * Check the given block for inconsistencies. Look at the * current state of the block and reconcile the differences as follows: * <ul> * <li>If the block file is missing, delete the block from volumeMap</li> * <li>If the block file exists and the block is missing in volumeMap, * add the block to volumeMap <li> * <li>If generation stamp does not match, then update the block with right * generation stamp</li> * <li>If the block length in memory does not match the actual block file * length * then mark the block as corrupt and update the block length in memory</li> * <li>If the file in {@link ReplicaInfo} does not match the file on * the disk, update {@link ReplicaInfo} with the correct file</li> * </ul> * * @param blockId * Block that differs * @param diskFile * Block file on the disk * @param diskMetaFile * Metadata file from on the disk * @param vol * Volume of the block file */ @Override public void checkAndUpdate(String bpid, long blockId, File diskFile, File diskMetaFile, FsVolumeSpi vol) { Block corruptBlock = null; ReplicaInfo memBlockInfo; synchronized (this) { memBlockInfo = volumeMap.get(bpid, blockId); if (memBlockInfo != null && memBlockInfo.getState() != ReplicaState.FINALIZED) { // Block is not finalized - ignore the difference return; } final long diskGS = diskMetaFile != null && diskMetaFile.exists() ? Block.getGenerationStamp(diskMetaFile.getName()) : GenerationStamp.GRANDFATHER_GENERATION_STAMP; if (diskFile == null || !diskFile.exists()) { if (memBlockInfo == null) { // Block file does not exist and block does not exist in memory // If metadata file exists then delete it if (diskMetaFile != null && diskMetaFile.exists() && diskMetaFile.delete()) { LOG.warn("Deleted a metadata file without a block " + diskMetaFile.getAbsolutePath()); } return; } if (!memBlockInfo.getBlockFile().exists()) { // Block is in memory and not on the disk // Remove the block from volumeMap volumeMap.remove(bpid, blockId); LOG.warn("Removed block " + blockId + " from memory with missing block file on the disk"); // Finally remove the metadata file if (diskMetaFile != null && diskMetaFile.exists() && diskMetaFile.delete()) { LOG.warn("Deleted a metadata file for the deleted block " + diskMetaFile.getAbsolutePath()); } } return; } /* * Block file exists on the disk */ if (memBlockInfo == null) { // Block is missing in memory - add the block to volumeMap ReplicaInfo diskBlockInfo = new FinalizedReplica(blockId, diskFile.length(), diskGS, vol, diskFile.getParentFile()); volumeMap.add(bpid, diskBlockInfo); LOG.warn("Added missing block to memory " + diskBlockInfo); return; } /* * Block exists in volumeMap and the block file exists on the disk */ // Compare block files File memFile = memBlockInfo.getBlockFile(); if (memFile.exists()) { if (memFile.compareTo(diskFile) != 0) { LOG.warn("Block file " + memFile.getAbsolutePath() + " does not match file found by scan " + diskFile.getAbsolutePath()); // TODO: Should the diskFile be deleted? } } else { // Block refers to a block file that does not exist. // Update the block with the file found on the disk. Since the block // file and metadata file are found as a pair on the disk, update // the block based on the metadata file found on the disk LOG.warn("Block file in volumeMap " + memFile.getAbsolutePath() + " does not exist. Updating it to the file found during scan " + diskFile.getAbsolutePath()); memBlockInfo.setDir(diskFile.getParentFile()); memFile = diskFile; LOG.warn("Updating generation stamp for block " + blockId + " from " + memBlockInfo.getGenerationStamp() + " to " + diskGS); memBlockInfo.setGenerationStampNoPersistance(diskGS); } // Compare generation stamp if (memBlockInfo.getGenerationStamp() != diskGS) { File memMetaFile = FsDatasetUtil.getMetaFile(diskFile, memBlockInfo.getGenerationStamp()); if (memMetaFile.exists()) { if (memMetaFile.compareTo(diskMetaFile) != 0) { LOG.warn("Metadata file in memory " + memMetaFile.getAbsolutePath() + " does not match file found by scan " + (diskMetaFile == null ? null : diskMetaFile.getAbsolutePath())); } } else { // Metadata file corresponding to block in memory is missing // If metadata file found during the scan is on the same directory // as the block file, then use the generation stamp from it long gs = diskMetaFile != null && diskMetaFile.exists() && diskMetaFile.getParent().equals(memFile.getParent()) ? diskGS : GenerationStamp.GRANDFATHER_GENERATION_STAMP; LOG.warn("Updating generation stamp for block " + blockId + " from " + memBlockInfo.getGenerationStamp() + " to " + gs); memBlockInfo.setGenerationStampNoPersistance(gs); } } // Compare block size if (memBlockInfo.getNumBytes() != memFile.length()) { // Update the length based on the block file corruptBlock = new Block(memBlockInfo); LOG.warn("Updating size of block " + blockId + " from " + memBlockInfo.getNumBytes() + " to " + memFile.length()); memBlockInfo.setNumBytesNoPersistance(memFile.length()); } } // Send corrupt block report outside the lock if (corruptBlock != null) { LOG.warn("Reporting the block " + corruptBlock + " as corrupt due to length mismatch"); try { datanode.reportBadBlocks(new ExtendedBlock(bpid, corruptBlock)); } catch (IOException e) { LOG.warn("Failed to repot bad block " + corruptBlock, e); } } }