List of usage examples for org.joda.time Interval getStart
public DateTime getStart()
From source file:com.metamx.druid.index.v1.MMappedIndexStorageAdapter.java
License:Open Source License
@Override public Iterable<Cursor> makeCursors(Filter filter, Interval interval, QueryGranularity gran) { Interval actualInterval = interval; if (!actualInterval.overlaps(index.dataInterval)) { return ImmutableList.of(); }// ww w. j a v a 2 s . c o m if (actualInterval.getStart().isBefore(index.dataInterval.getStart())) { actualInterval = actualInterval.withStart(index.dataInterval.getStart()); } if (actualInterval.getEnd().isAfter(index.dataInterval.getEnd())) { actualInterval = actualInterval.withEnd(index.dataInterval.getEnd()); } final Iterable<Cursor> iterable; if (filter == null) { iterable = new NoFilterCursorIterable(index, actualInterval, gran); } else { Offset offset = new ConciseOffset(filter.goConcise(new MMappedBitmapIndexSelector(index))); iterable = new CursorIterable(index, actualInterval, gran, offset); } return FunctionalIterable.create(iterable).keep(Functions.<Cursor>identity()); }
From source file:com.metamx.druid.index.v1.QueryableIndexStorageAdapter.java
License:Open Source License
@Override public Iterable<Cursor> makeCursors(Filter filter, Interval interval, QueryGranularity gran) { Interval actualInterval = interval; final Interval dataInterval = getInterval(); if (!actualInterval.overlaps(dataInterval)) { return ImmutableList.of(); }//from w w w.ja v a 2 s. co m if (actualInterval.getStart().isBefore(dataInterval.getStart())) { actualInterval = actualInterval.withStart(dataInterval.getStart()); } if (actualInterval.getEnd().isAfter(dataInterval.getEnd())) { actualInterval = actualInterval.withEnd(dataInterval.getEnd()); } final Iterable<Cursor> iterable; if (filter == null) { iterable = new NoFilterCursorIterable(index, actualInterval, gran); } else { Offset offset = new ConciseOffset(filter.goConcise(new MMappedBitmapIndexSelector(index))); iterable = new CursorIterable(index, actualInterval, gran, offset); } return FunctionalIterable.create(iterable).keep(Functions.<Cursor>identity()); }
From source file:com.metamx.druid.indexer.DeterminePartitionsJob.java
License:Open Source License
public boolean run() { try {/*w ww . j av a2 s. com*/ /* * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear * in the final segment. */ if (!config.getPartitionsSpec().isAssumeGrouped()) { final Job groupByJob = new Job(new Configuration(), String.format( "%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals())); injectSystemProperties(groupByJob); groupByJob.setInputFormatClass(TextInputFormat.class); groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class); groupByJob.setMapOutputKeyClass(BytesWritable.class); groupByJob.setMapOutputValueClass(NullWritable.class); groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setOutputKeyClass(BytesWritable.class); groupByJob.setOutputValueClass(NullWritable.class); groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class); groupByJob.setJarByClass(DeterminePartitionsJob.class); config.addInputPaths(groupByJob); config.intoConfiguration(groupByJob); FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir()); groupByJob.submit(); log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(), groupByJob.getTrackingURL()); if (!groupByJob.waitForCompletion(true)) { log.error("Job failed: %s", groupByJob.getJobID()); return false; } } else { log.info("Skipping group-by job."); } /* * Read grouped data and determine appropriate partitions. */ final Job dimSelectionJob = new Job(new Configuration(), String.format( "%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals())); dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19"); injectSystemProperties(dimSelectionJob); if (!config.getPartitionsSpec().isAssumeGrouped()) { // Read grouped data from the groupByJob. dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class); dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir()); } else { // Directly read the source data, since we assume it's already grouped. dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class); dimSelectionJob.setInputFormatClass(TextInputFormat.class); config.addInputPaths(dimSelectionJob); } SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob); dimSelectionJob.setMapOutputValueClass(Text.class); dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class); dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class); dimSelectionJob.setOutputKeyClass(BytesWritable.class); dimSelectionJob.setOutputValueClass(Text.class); dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class); dimSelectionJob.setJarByClass(DeterminePartitionsJob.class); config.intoConfiguration(dimSelectionJob); FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath()); dimSelectionJob.submit(); log.info("Job %s submitted, status available at: %s", dimSelectionJob.getJobName(), dimSelectionJob.getTrackingURL()); if (!dimSelectionJob.waitForCompletion(true)) { log.error("Job failed: %s", dimSelectionJob.getJobID().toString()); return false; } /* * Load partitions determined by the previous job. */ log.info("Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals()); FileSystem fileSystem = null; Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance()); int shardCount = 0; for (Interval segmentGranularity : config.getSegmentGranularIntervals()) { DateTime bucket = segmentGranularity.getStart(); final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(new Bucket(0, bucket, 0)); if (fileSystem == null) { fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration()); } if (fileSystem.exists(partitionInfoPath)) { List<ShardSpec> specs = config.jsonMapper.readValue( Utils.openInputStream(dimSelectionJob, partitionInfoPath), new TypeReference<List<ShardSpec>>() { }); List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size()); for (int i = 0; i < specs.size(); ++i) { actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++)); log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i)); } shardSpecs.put(bucket, actualSpecs); } else { log.info("Path[%s] didn't exist!?", partitionInfoPath); } } config.setShardSpecs(shardSpecs); return true; } catch (Exception e) { throw Throwables.propagate(e); } }
From source file:com.metamx.druid.indexer.HadoopDruidIndexerConfig.java
License:Open Source License
public Iterable<Bucket> getAllBuckets() { return FunctionalIterable.create(getSegmentGranularIntervals()) .transformCat(new Function<Interval, Iterable<Bucket>>() { @Override//from w ww .j av a 2s.com public Iterable<Bucket> apply(Interval input) { final DateTime bucketTime = input.getStart(); final List<HadoopyShardSpec> specs = shardSpecs.get(bucketTime); if (specs == null) { return ImmutableList.of(); } return FunctionalIterable.create(specs).transform(new Function<HadoopyShardSpec, Bucket>() { int i = 0; @Override public Bucket apply(HadoopyShardSpec input) { return new Bucket(input.getShardNum(), bucketTime, i++); } }); } }); }
From source file:com.metamx.druid.indexer.HadoopDruidIndexerConfig.java
License:Open Source License
public Path makeSegmentPartitionInfoPath(Bucket bucket) { final Interval bucketInterval = getGranularitySpec().bucketInterval(bucket.time).get(); return new Path(String.format("%s/%s_%s/partitions.json", makeIntermediatePath(), ISODateTimeFormat.basicDateTime().print(bucketInterval.getStart()), ISODateTimeFormat.basicDateTime().print(bucketInterval.getEnd()))); }
From source file:com.metamx.druid.indexer.HadoopDruidIndexerConfig.java
License:Open Source License
public Path makeSegmentOutputPath(Bucket bucket) { final Interval bucketInterval = getGranularitySpec().bucketInterval(bucket.time).get(); return new Path(String.format("%s/%s_%s/%s/%s", getSegmentOutputDir(), bucketInterval.getStart().toString(), bucketInterval.getEnd().toString(), getVersion().toString(), bucket.partitionNum)); }
From source file:com.metamx.druid.indexer.HadoopDruidIndexerJob.java
License:Open Source License
@Override public boolean run() { List<Jobby> jobs = Lists.newArrayList(); ensurePaths();//from w ww. j a va 2s. c om if (config.partitionByDimension()) { jobs.add(new DeterminePartitionsJob(config)); } else { Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance()); int shardCount = 0; for (Interval segmentGranularity : config.getSegmentGranularIntervals()) { DateTime bucket = segmentGranularity.getStart(); final HadoopyShardSpec spec = new HadoopyShardSpec(new NoneShardSpec(), shardCount++); shardSpecs.put(bucket, Lists.newArrayList(spec)); log.info("DateTime[%s], spec[%s]", bucket, spec); } config.setShardSpecs(shardSpecs); } indexJob = new IndexGeneratorJob(config); jobs.add(indexJob); if (dbUpdaterJob != null) { jobs.add(dbUpdaterJob); } else { log.info("No updaterJobSpec set, not uploading to database"); } String failedMessage = null; for (Jobby job : jobs) { if (failedMessage == null) { if (!job.run()) { failedMessage = String.format("Job[%s] failed!", job.getClass()); } } } if (!config.isLeaveIntermediate()) { if (failedMessage == null || config.isCleanupOnFailure()) { Path workingPath = config.makeIntermediatePath(); log.info("Deleting path[%s]", workingPath); try { workingPath.getFileSystem(new Configuration()).delete(workingPath, true); } catch (IOException e) { log.error(e, "Failed to cleanup path[%s]", workingPath); } } } if (failedMessage != null) { throw new ISE(failedMessage); } return true; }
From source file:com.metamx.druid.indexer.path.GranularityPathSpec.java
License:Open Source License
@Override public Job addInputPaths(HadoopDruidIndexerConfig config, Job job) throws IOException { final Set<Interval> intervals = Sets.newTreeSet(Comparators.intervals()); for (Interval segmentInterval : config.getSegmentGranularIntervals()) { for (Interval dataInterval : dataGranularity.getIterable(segmentInterval)) { intervals.add(dataInterval); }/*from w ww. ja va 2 s.com*/ } Path betaInput = new Path(inputPath); FileSystem fs = betaInput.getFileSystem(job.getConfiguration()); Set<String> paths = Sets.newTreeSet(); Pattern fileMatcher = Pattern.compile(filePattern); DateTimeFormatter customFormatter = null; if (pathFormat != null) { customFormatter = DateTimeFormat.forPattern(pathFormat); } for (Interval interval : intervals) { DateTime t = interval.getStart(); String intervalPath = null; if (customFormatter != null) { intervalPath = customFormatter.print(t); } else { intervalPath = dataGranularity.toPath(t); } Path granularPath = new Path(betaInput, intervalPath); log.info("Checking path[%s]", granularPath); for (FileStatus status : FSSpideringIterator.spiderIterable(fs, granularPath)) { final Path filePath = status.getPath(); if (fileMatcher.matcher(filePath.toString()).matches()) { paths.add(filePath.toString()); } } } for (String path : paths) { log.info("Appending path[%s]", path); FileInputFormat.addInputPath(job, new Path(path)); } return job; }
From source file:com.metamx.druid.indexing.common.task.DeleteTask.java
License:Open Source License
@JsonCreator public DeleteTask(@JsonProperty("id") String id, @JsonProperty("dataSource") String dataSource, @JsonProperty("interval") Interval interval) { super(id != null ? id : String.format("delete_%s_%s_%s_%s", dataSource, interval.getStart(), interval.getEnd(), new DateTime().toString()), dataSource, Preconditions.checkNotNull(interval, "interval")); }
From source file:com.metamx.druid.indexing.common.task.IndexDeterminePartitionsTask.java
License:Open Source License
@JsonCreator public IndexDeterminePartitionsTask(@JsonProperty("id") String id, @JsonProperty("groupId") String groupId, @JsonProperty("interval") Interval interval, @JsonProperty("firehose") FirehoseFactory firehoseFactory, @JsonProperty("schema") Schema schema, @JsonProperty("targetPartitionSize") long targetPartitionSize, @JsonProperty("rowFlushBoundary") int rowFlushBoundary) { super(id != null ? id : makeTaskId(groupId, interval.getStart(), interval.getEnd()), groupId, schema.getDataSource(), Preconditions.checkNotNull(interval, "interval")); this.firehoseFactory = firehoseFactory; this.schema = schema; this.targetPartitionSize = targetPartitionSize; this.rowFlushBoundary = rowFlushBoundary; }