List of usage examples for org.joda.time Interval getStart
public DateTime getStart()
From source file:org.apache.druid.indexer.DetermineHashedPartitionsJob.java
License:Apache License
@Override public boolean run() { try {//from w w w. ja va2 s . c o m /* * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear * in the final segment. */ startTime = System.currentTimeMillis(); groupByJob = Job.getInstance(new Configuration(), StringUtils .format("%s-determine_partitions_hashed-%s", config.getDataSource(), config.getIntervals())); JobHelper.injectSystemProperties(groupByJob); config.addJobProperties(groupByJob); groupByJob.setMapperClass(DetermineCardinalityMapper.class); groupByJob.setMapOutputKeyClass(LongWritable.class); groupByJob.setMapOutputValueClass(BytesWritable.class); groupByJob.setReducerClass(DetermineCardinalityReducer.class); groupByJob.setOutputKeyClass(NullWritable.class); groupByJob.setOutputValueClass(NullWritable.class); groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class); groupByJob.setPartitionerClass(DetermineHashedPartitionsPartitioner.class); if (!config.getSegmentGranularIntervals().isPresent()) { groupByJob.setNumReduceTasks(1); } else { groupByJob.setNumReduceTasks(config.getSegmentGranularIntervals().get().size()); } JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), groupByJob); config.addInputPaths(groupByJob); config.intoConfiguration(groupByJob); FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir()); groupByJob.submit(); log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(), groupByJob.getTrackingURL()); // Store the jobId in the file if (groupByJob.getJobID() != null) { JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), groupByJob.getJobID().toString()); } try { if (!groupByJob.waitForCompletion(true)) { log.error("Job failed: %s", groupByJob.getJobID()); failureCause = Utils.getFailureMessage(groupByJob, config.JSON_MAPPER); return false; } } catch (IOException ioe) { if (!Utils.checkAppSuccessForJobIOException(ioe, groupByJob, config.isUseYarnRMJobStatusFallback())) { throw ioe; } } /* * Load partitions and intervals determined by the previous job. */ log.info("Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals()); FileSystem fileSystem = null; if (!config.getSegmentGranularIntervals().isPresent()) { final Path intervalInfoPath = config.makeIntervalInfoPath(); fileSystem = intervalInfoPath.getFileSystem(groupByJob.getConfiguration()); if (!Utils.exists(groupByJob, fileSystem, intervalInfoPath)) { throw new ISE("Path[%s] didn't exist!?", intervalInfoPath); } List<Interval> intervals = config.JSON_MAPPER.readValue( Utils.openInputStream(groupByJob, intervalInfoPath), new TypeReference<List<Interval>>() { }); config.setGranularitySpec( new UniformGranularitySpec(config.getGranularitySpec().getSegmentGranularity(), config.getGranularitySpec().getQueryGranularity(), config.getGranularitySpec().isRollup(), intervals)); log.info("Determined Intervals for Job [%s].", config.getSegmentGranularIntervals()); } Map<Long, List<HadoopyShardSpec>> shardSpecs = new TreeMap<>(DateTimeComparator.getInstance()); int shardCount = 0; for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) { DateTime bucket = segmentGranularity.getStart(); final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity); if (fileSystem == null) { fileSystem = partitionInfoPath.getFileSystem(groupByJob.getConfiguration()); } if (Utils.exists(groupByJob, fileSystem, partitionInfoPath)) { final Long numRows = config.JSON_MAPPER .readValue(Utils.openInputStream(groupByJob, partitionInfoPath), Long.class); log.info("Found approximately [%,d] rows in data.", numRows); final int numberOfShards = (int) Math.ceil((double) numRows / config.getTargetPartitionSize()); log.info("Creating [%,d] shards", numberOfShards); List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(numberOfShards); for (int i = 0; i < numberOfShards; ++i) { actualSpecs.add(new HadoopyShardSpec(new HashBasedNumberedShardSpec(i, numberOfShards, null, HadoopDruidIndexerConfig.JSON_MAPPER), shardCount++)); log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i)); } shardSpecs.put(bucket.getMillis(), actualSpecs); } else { log.info("Path[%s] didn't exist!?", partitionInfoPath); } } config.setShardSpecs(shardSpecs); log.info("DetermineHashedPartitionsJob took %d millis", (System.currentTimeMillis() - startTime)); return true; } catch (Exception e) { throw new RuntimeException(e); } }
From source file:org.apache.druid.indexer.HadoopDruidDetermineConfigurationJob.java
License:Apache License
@Override public boolean run() { JobHelper.ensurePaths(config);// ww w .j a v a 2s . c o m if (config.isDeterminingPartitions()) { job = createPartitionJob(config); config.setHadoopJobIdFileName(hadoopJobIdFile); return JobHelper.runSingleJob(job, config); } else { final PartitionsSpec partitionsSpec = config.getPartitionsSpec(); final int shardsPerInterval; if (partitionsSpec instanceof HashedPartitionsSpec) { final HashedPartitionsSpec hashedPartitionsSpec = (HashedPartitionsSpec) partitionsSpec; shardsPerInterval = PartitionsSpec.isEffectivelyNull(hashedPartitionsSpec.getNumShards()) ? 1 : hashedPartitionsSpec.getNumShards(); } else { shardsPerInterval = 1; } Map<Long, List<HadoopyShardSpec>> shardSpecs = new TreeMap<>(); int shardCount = 0; for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) { DateTime bucket = segmentGranularity.getStart(); // negative shardsPerInterval means a single shard List<HadoopyShardSpec> specs = Lists.newArrayListWithCapacity(shardsPerInterval); for (int i = 0; i < shardsPerInterval; i++) { specs.add(new HadoopyShardSpec(new HashBasedNumberedShardSpec(i, shardsPerInterval, config.getPartitionsSpec().getPartitionDimensions(), HadoopDruidIndexerConfig.JSON_MAPPER), shardCount++)); } shardSpecs.put(bucket.getMillis(), specs); log.info("DateTime[%s], spec[%s]", bucket, specs); } config.setShardSpecs(shardSpecs); return true; } }
From source file:org.apache.druid.indexer.HadoopDruidIndexerConfig.java
License:Apache License
public Optional<Iterable<Bucket>> getAllBuckets() { Optional<Set<Interval>> intervals = getSegmentGranularIntervals(); if (intervals.isPresent()) { return Optional.of((Iterable<Bucket>) FunctionalIterable.create(intervals.get()) .transformCat(new Function<Interval, Iterable<Bucket>>() { @Override/* ww w. ja v a2s .c o m*/ public Iterable<Bucket> apply(Interval input) { final DateTime bucketTime = input.getStart(); final List<HadoopyShardSpec> specs = schema.getTuningConfig().getShardSpecs() .get(bucketTime.getMillis()); if (specs == null) { return ImmutableList.of(); } return FunctionalIterable.create(specs) .transform(new Function<HadoopyShardSpec, Bucket>() { int i = 0; @Override public Bucket apply(HadoopyShardSpec input) { return new Bucket(input.getShardNum(), bucketTime, i++); } }); } })); } else { return Optional.absent(); } }
From source file:org.apache.druid.indexer.HadoopDruidIndexerConfig.java
License:Apache License
public Path makeSegmentPartitionInfoPath(Interval bucketInterval) { return new Path(StringUtils.format("%s/%s_%s/partitions.json", makeIntermediatePath(), ISODateTimeFormat.basicDateTime().print(bucketInterval.getStart()), ISODateTimeFormat.basicDateTime().print(bucketInterval.getEnd()))); }
From source file:org.apache.druid.indexer.path.GranularityPathSpec.java
License:Apache License
@Override public Job addInputPaths(HadoopDruidIndexerConfig config, Job job) throws IOException { final Set<Interval> intervals = new TreeSet<>(Comparators.intervalsByStartThenEnd()); for (Interval inputInterval : config.getInputIntervals()) { for (Interval interval : dataGranularity.getIterable(inputInterval)) { intervals.add(trim(inputInterval, interval)); }/*from w w w .j av a2s .c om*/ } Path betaInput = new Path(inputPath); FileSystem fs = betaInput.getFileSystem(job.getConfiguration()); Set<String> paths = new TreeSet<>(); Pattern fileMatcher = Pattern.compile(filePattern); DateTimeFormatter customFormatter = null; if (pathFormat != null) { customFormatter = DateTimeFormat.forPattern(pathFormat); } for (Interval interval : intervals) { DateTime t = interval.getStart(); String intervalPath; if (customFormatter != null) { intervalPath = customFormatter.print(t); } else { intervalPath = dataGranularity.toPath(t); } Path granularPath = new Path(betaInput, intervalPath); log.info("Checking path[%s]", granularPath); for (FileStatus status : FSSpideringIterator.spiderIterable(fs, granularPath)) { final Path filePath = status.getPath(); if (fileMatcher.matcher(filePath.toString()).matches()) { paths.add(filePath.toString()); } } } log.info("Appending path %s", paths); StaticPathSpec.addToMultipleInputs(config, job, paths, inputFormat); return job; }
From source file:org.apache.druid.indexing.common.task.AbstractTask.java
License:Apache License
static String getOrMakeId(String id, final String typeName, String dataSource, @Nullable Interval interval) { if (id != null) { return id; }/* w ww . j a v a 2 s.com*/ final List<Object> objects = new ArrayList<>(); objects.add(typeName); objects.add(dataSource); if (interval != null) { objects.add(interval.getStart()); objects.add(interval.getEnd()); } objects.add(DateTimes.nowUtc().toString()); return joinId(objects); }
From source file:org.apache.druid.indexing.common.task.batch.parallel.PartialSegmentMergeTask.java
License:Apache License
private Map<Interval, Int2ObjectMap<List<File>>> fetchSegmentFiles(TaskToolbox toolbox, Map<Interval, Int2ObjectMap<List<PartitionLocation>>> intervalToPartitions) throws IOException { final File tempDir = toolbox.getFirehoseTemporaryDir(); FileUtils.deleteQuietly(tempDir);//from ww w .j a v a2s.co m FileUtils.forceMkdir(tempDir); final Map<Interval, Int2ObjectMap<List<File>>> intervalToUnzippedFiles = new HashMap<>(); // Fetch partition files for (Entry<Interval, Int2ObjectMap<List<PartitionLocation>>> entryPerInterval : intervalToPartitions .entrySet()) { final Interval interval = entryPerInterval.getKey(); for (Int2ObjectMap.Entry<List<PartitionLocation>> entryPerPartitionId : entryPerInterval.getValue() .int2ObjectEntrySet()) { final int partitionId = entryPerPartitionId.getIntKey(); final File partitionDir = FileUtils.getFile(tempDir, interval.getStart().toString(), interval.getEnd().toString(), Integer.toString(partitionId)); FileUtils.forceMkdir(partitionDir); for (PartitionLocation location : entryPerPartitionId.getValue()) { final File zippedFile = fetchSegmentFile(partitionDir, location); try { final File unzippedDir = new File(partitionDir, StringUtils.format("unzipped_%s", location.getSubTaskId())); FileUtils.forceMkdir(unzippedDir); CompressionUtils.unzip(zippedFile, unzippedDir); intervalToUnzippedFiles.computeIfAbsent(interval, k -> new Int2ObjectOpenHashMap<>()) .computeIfAbsent(partitionId, k -> new ArrayList<>()).add(unzippedDir); } finally { if (!zippedFile.delete()) { LOG.warn("Failed to delete temp file[%s]", zippedFile); } } } } } return intervalToUnzippedFiles; }
From source file:org.apache.druid.indexing.common.task.ConvertSegmentTask.java
License:Apache License
protected static String makeId(String dataSource, Interval interval) { Preconditions.checkNotNull(dataSource, "dataSource"); Preconditions.checkNotNull(interval, "interval"); return joinId(TYPE, dataSource, interval.getStart(), interval.getEnd(), DateTimes.nowUtc()); }
From source file:org.apache.druid.indexing.common.task.SameIntervalMergeTask.java
License:Apache License
public static String makeId(String id, final String typeName, String dataSource, Interval interval) { return id != null ? id : joinId(typeName, dataSource, interval.getStart(), interval.getEnd(), DateTimes.nowUtc().toString()); }
From source file:org.apache.druid.indexing.overlord.IndexerMetadataStorageAdapter.java
License:Apache License
public int deletePendingSegments(String dataSource, Interval deleteInterval) { // Check the given interval overlaps the interval(minCreatedDateOfActiveTasks, MAX) final Optional<DateTime> minCreatedDateOfActiveTasks = taskStorageQueryAdapter.getActiveTaskInfo(dataSource) .stream().map(TaskInfo::getCreatedTime).min(Comparator.naturalOrder()); final Interval activeTaskInterval = new Interval(minCreatedDateOfActiveTasks.orElse(DateTimes.MAX), DateTimes.MAX);/*from ww w. j ava 2s . co m*/ Preconditions.checkArgument(!deleteInterval.overlaps(activeTaskInterval), "Cannot delete pendingSegments because there is at least one active task created at %s", activeTaskInterval.getStart()); return indexerMetadataStorageCoordinator.deletePendingSegments(dataSource, deleteInterval); }