Example usage for org.joda.time Interval getStart

Introduction

In this page you can find the example usage for org.joda.time Interval getStart.

Prototype

public DateTime getStart()

Source Link

Document

Gets the start of this time interval, which is inclusive, as a DateTime.

Usage

From source file:org.apache.druid.indexer.DetermineHashedPartitionsJob.java

License:Apache License

@Override
public boolean run() {
    try {//from   w w w.  ja va2 s  .  c  o m
        /*
         * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear
         * in the final segment.
         */
        startTime = System.currentTimeMillis();
        groupByJob = Job.getInstance(new Configuration(), StringUtils
                .format("%s-determine_partitions_hashed-%s", config.getDataSource(), config.getIntervals()));

        JobHelper.injectSystemProperties(groupByJob);
        config.addJobProperties(groupByJob);
        groupByJob.setMapperClass(DetermineCardinalityMapper.class);
        groupByJob.setMapOutputKeyClass(LongWritable.class);
        groupByJob.setMapOutputValueClass(BytesWritable.class);
        groupByJob.setReducerClass(DetermineCardinalityReducer.class);
        groupByJob.setOutputKeyClass(NullWritable.class);
        groupByJob.setOutputValueClass(NullWritable.class);
        groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
        groupByJob.setPartitionerClass(DetermineHashedPartitionsPartitioner.class);
        if (!config.getSegmentGranularIntervals().isPresent()) {
            groupByJob.setNumReduceTasks(1);
        } else {
            groupByJob.setNumReduceTasks(config.getSegmentGranularIntervals().get().size());
        }
        JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()),
                JobHelper.distributedClassPath(config.makeIntermediatePath()), groupByJob);

        config.addInputPaths(groupByJob);
        config.intoConfiguration(groupByJob);
        FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir());

        groupByJob.submit();
        log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(),
                groupByJob.getTrackingURL());

        // Store the jobId in the file
        if (groupByJob.getJobID() != null) {
            JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), groupByJob.getJobID().toString());
        }

        try {
            if (!groupByJob.waitForCompletion(true)) {
                log.error("Job failed: %s", groupByJob.getJobID());
                failureCause = Utils.getFailureMessage(groupByJob, config.JSON_MAPPER);
                return false;
            }
        } catch (IOException ioe) {
            if (!Utils.checkAppSuccessForJobIOException(ioe, groupByJob,
                    config.isUseYarnRMJobStatusFallback())) {
                throw ioe;
            }
        }

        /*
         * Load partitions and intervals determined by the previous job.
         */

        log.info("Job completed, loading up partitions for intervals[%s].",
                config.getSegmentGranularIntervals());
        FileSystem fileSystem = null;
        if (!config.getSegmentGranularIntervals().isPresent()) {
            final Path intervalInfoPath = config.makeIntervalInfoPath();
            fileSystem = intervalInfoPath.getFileSystem(groupByJob.getConfiguration());
            if (!Utils.exists(groupByJob, fileSystem, intervalInfoPath)) {
                throw new ISE("Path[%s] didn't exist!?", intervalInfoPath);
            }
            List<Interval> intervals = config.JSON_MAPPER.readValue(
                    Utils.openInputStream(groupByJob, intervalInfoPath), new TypeReference<List<Interval>>() {
                    });
            config.setGranularitySpec(
                    new UniformGranularitySpec(config.getGranularitySpec().getSegmentGranularity(),
                            config.getGranularitySpec().getQueryGranularity(),
                            config.getGranularitySpec().isRollup(), intervals));
            log.info("Determined Intervals for Job [%s].", config.getSegmentGranularIntervals());
        }
        Map<Long, List<HadoopyShardSpec>> shardSpecs = new TreeMap<>(DateTimeComparator.getInstance());
        int shardCount = 0;
        for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) {
            DateTime bucket = segmentGranularity.getStart();

            final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity);
            if (fileSystem == null) {
                fileSystem = partitionInfoPath.getFileSystem(groupByJob.getConfiguration());
            }
            if (Utils.exists(groupByJob, fileSystem, partitionInfoPath)) {
                final Long numRows = config.JSON_MAPPER
                        .readValue(Utils.openInputStream(groupByJob, partitionInfoPath), Long.class);

                log.info("Found approximately [%,d] rows in data.", numRows);

                final int numberOfShards = (int) Math.ceil((double) numRows / config.getTargetPartitionSize());

                log.info("Creating [%,d] shards", numberOfShards);

                List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(numberOfShards);
                for (int i = 0; i < numberOfShards; ++i) {
                    actualSpecs.add(new HadoopyShardSpec(new HashBasedNumberedShardSpec(i, numberOfShards, null,
                            HadoopDruidIndexerConfig.JSON_MAPPER), shardCount++));
                    log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i));
                }

                shardSpecs.put(bucket.getMillis(), actualSpecs);

            } else {
                log.info("Path[%s] didn't exist!?", partitionInfoPath);
            }
        }

        config.setShardSpecs(shardSpecs);
        log.info("DetermineHashedPartitionsJob took %d millis", (System.currentTimeMillis() - startTime));

        return true;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:org.apache.druid.indexer.HadoopDruidDetermineConfigurationJob.java

License:Apache License

@Override
public boolean run() {
    JobHelper.ensurePaths(config);//  ww  w .j a  v  a 2s . c o m

    if (config.isDeterminingPartitions()) {
        job = createPartitionJob(config);
        config.setHadoopJobIdFileName(hadoopJobIdFile);
        return JobHelper.runSingleJob(job, config);
    } else {
        final PartitionsSpec partitionsSpec = config.getPartitionsSpec();
        final int shardsPerInterval;
        if (partitionsSpec instanceof HashedPartitionsSpec) {
            final HashedPartitionsSpec hashedPartitionsSpec = (HashedPartitionsSpec) partitionsSpec;
            shardsPerInterval = PartitionsSpec.isEffectivelyNull(hashedPartitionsSpec.getNumShards()) ? 1
                    : hashedPartitionsSpec.getNumShards();
        } else {
            shardsPerInterval = 1;
        }
        Map<Long, List<HadoopyShardSpec>> shardSpecs = new TreeMap<>();
        int shardCount = 0;
        for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) {
            DateTime bucket = segmentGranularity.getStart();
            // negative shardsPerInterval means a single shard
            List<HadoopyShardSpec> specs = Lists.newArrayListWithCapacity(shardsPerInterval);
            for (int i = 0; i < shardsPerInterval; i++) {
                specs.add(new HadoopyShardSpec(new HashBasedNumberedShardSpec(i, shardsPerInterval,
                        config.getPartitionsSpec().getPartitionDimensions(),
                        HadoopDruidIndexerConfig.JSON_MAPPER), shardCount++));
            }
            shardSpecs.put(bucket.getMillis(), specs);
            log.info("DateTime[%s], spec[%s]", bucket, specs);
        }
        config.setShardSpecs(shardSpecs);
        return true;
    }
}

From source file:org.apache.druid.indexer.HadoopDruidIndexerConfig.java

License:Apache License

public Optional<Iterable<Bucket>> getAllBuckets() {
    Optional<Set<Interval>> intervals = getSegmentGranularIntervals();
    if (intervals.isPresent()) {
        return Optional.of((Iterable<Bucket>) FunctionalIterable.create(intervals.get())
                .transformCat(new Function<Interval, Iterable<Bucket>>() {
                    @Override/*  ww w. ja  v  a2s .c  o  m*/
                    public Iterable<Bucket> apply(Interval input) {
                        final DateTime bucketTime = input.getStart();
                        final List<HadoopyShardSpec> specs = schema.getTuningConfig().getShardSpecs()
                                .get(bucketTime.getMillis());
                        if (specs == null) {
                            return ImmutableList.of();
                        }

                        return FunctionalIterable.create(specs)
                                .transform(new Function<HadoopyShardSpec, Bucket>() {
                                    int i = 0;

                                    @Override
                                    public Bucket apply(HadoopyShardSpec input) {
                                        return new Bucket(input.getShardNum(), bucketTime, i++);
                                    }
                                });
                    }
                }));
    } else {
        return Optional.absent();
    }
}

From source file:org.apache.druid.indexer.HadoopDruidIndexerConfig.java

License:Apache License

public Path makeSegmentPartitionInfoPath(Interval bucketInterval) {
    return new Path(StringUtils.format("%s/%s_%s/partitions.json", makeIntermediatePath(),
            ISODateTimeFormat.basicDateTime().print(bucketInterval.getStart()),
            ISODateTimeFormat.basicDateTime().print(bucketInterval.getEnd())));
}

From source file:org.apache.druid.indexer.path.GranularityPathSpec.java

License:Apache License

@Override
public Job addInputPaths(HadoopDruidIndexerConfig config, Job job) throws IOException {
    final Set<Interval> intervals = new TreeSet<>(Comparators.intervalsByStartThenEnd());
    for (Interval inputInterval : config.getInputIntervals()) {
        for (Interval interval : dataGranularity.getIterable(inputInterval)) {
            intervals.add(trim(inputInterval, interval));
        }/*from   w w w .j  av  a2s .c  om*/
    }

    Path betaInput = new Path(inputPath);
    FileSystem fs = betaInput.getFileSystem(job.getConfiguration());
    Set<String> paths = new TreeSet<>();
    Pattern fileMatcher = Pattern.compile(filePattern);

    DateTimeFormatter customFormatter = null;
    if (pathFormat != null) {
        customFormatter = DateTimeFormat.forPattern(pathFormat);
    }

    for (Interval interval : intervals) {
        DateTime t = interval.getStart();
        String intervalPath;
        if (customFormatter != null) {
            intervalPath = customFormatter.print(t);
        } else {
            intervalPath = dataGranularity.toPath(t);
        }

        Path granularPath = new Path(betaInput, intervalPath);
        log.info("Checking path[%s]", granularPath);
        for (FileStatus status : FSSpideringIterator.spiderIterable(fs, granularPath)) {
            final Path filePath = status.getPath();
            if (fileMatcher.matcher(filePath.toString()).matches()) {
                paths.add(filePath.toString());
            }
        }
    }

    log.info("Appending path %s", paths);
    StaticPathSpec.addToMultipleInputs(config, job, paths, inputFormat);

    return job;
}

From source file:org.apache.druid.indexing.common.task.AbstractTask.java

License:Apache License

static String getOrMakeId(String id, final String typeName, String dataSource, @Nullable Interval interval) {
    if (id != null) {
        return id;
    }/* w ww  . j a  v a  2 s.com*/

    final List<Object> objects = new ArrayList<>();
    objects.add(typeName);
    objects.add(dataSource);
    if (interval != null) {
        objects.add(interval.getStart());
        objects.add(interval.getEnd());
    }
    objects.add(DateTimes.nowUtc().toString());

    return joinId(objects);
}

From source file:org.apache.druid.indexing.common.task.batch.parallel.PartialSegmentMergeTask.java

License:Apache License

private Map<Interval, Int2ObjectMap<List<File>>> fetchSegmentFiles(TaskToolbox toolbox,
        Map<Interval, Int2ObjectMap<List<PartitionLocation>>> intervalToPartitions) throws IOException {
    final File tempDir = toolbox.getFirehoseTemporaryDir();
    FileUtils.deleteQuietly(tempDir);//from ww  w  .j  a v  a2s.co m
    FileUtils.forceMkdir(tempDir);

    final Map<Interval, Int2ObjectMap<List<File>>> intervalToUnzippedFiles = new HashMap<>();
    // Fetch partition files
    for (Entry<Interval, Int2ObjectMap<List<PartitionLocation>>> entryPerInterval : intervalToPartitions
            .entrySet()) {
        final Interval interval = entryPerInterval.getKey();
        for (Int2ObjectMap.Entry<List<PartitionLocation>> entryPerPartitionId : entryPerInterval.getValue()
                .int2ObjectEntrySet()) {
            final int partitionId = entryPerPartitionId.getIntKey();
            final File partitionDir = FileUtils.getFile(tempDir, interval.getStart().toString(),
                    interval.getEnd().toString(), Integer.toString(partitionId));
            FileUtils.forceMkdir(partitionDir);
            for (PartitionLocation location : entryPerPartitionId.getValue()) {
                final File zippedFile = fetchSegmentFile(partitionDir, location);
                try {
                    final File unzippedDir = new File(partitionDir,
                            StringUtils.format("unzipped_%s", location.getSubTaskId()));
                    FileUtils.forceMkdir(unzippedDir);
                    CompressionUtils.unzip(zippedFile, unzippedDir);
                    intervalToUnzippedFiles.computeIfAbsent(interval, k -> new Int2ObjectOpenHashMap<>())
                            .computeIfAbsent(partitionId, k -> new ArrayList<>()).add(unzippedDir);
                } finally {
                    if (!zippedFile.delete()) {
                        LOG.warn("Failed to delete temp file[%s]", zippedFile);
                    }
                }
            }
        }
    }
    return intervalToUnzippedFiles;
}

From source file:org.apache.druid.indexing.common.task.ConvertSegmentTask.java

License:Apache License

protected static String makeId(String dataSource, Interval interval) {
    Preconditions.checkNotNull(dataSource, "dataSource");
    Preconditions.checkNotNull(interval, "interval");
    return joinId(TYPE, dataSource, interval.getStart(), interval.getEnd(), DateTimes.nowUtc());
}

From source file:org.apache.druid.indexing.common.task.SameIntervalMergeTask.java

License:Apache License

public static String makeId(String id, final String typeName, String dataSource, Interval interval) {
    return id != null ? id
            : joinId(typeName, dataSource, interval.getStart(), interval.getEnd(),
                    DateTimes.nowUtc().toString());
}

From source file:org.apache.druid.indexing.overlord.IndexerMetadataStorageAdapter.java

License:Apache License

public int deletePendingSegments(String dataSource, Interval deleteInterval) {
    // Check the given interval overlaps the interval(minCreatedDateOfActiveTasks, MAX)
    final Optional<DateTime> minCreatedDateOfActiveTasks = taskStorageQueryAdapter.getActiveTaskInfo(dataSource)
            .stream().map(TaskInfo::getCreatedTime).min(Comparator.naturalOrder());

    final Interval activeTaskInterval = new Interval(minCreatedDateOfActiveTasks.orElse(DateTimes.MAX),
            DateTimes.MAX);/*from   ww w. j  ava 2s  . co m*/

    Preconditions.checkArgument(!deleteInterval.overlaps(activeTaskInterval),
            "Cannot delete pendingSegments because there is at least one active task created at %s",
            activeTaskInterval.getStart());

    return indexerMetadataStorageCoordinator.deletePendingSegments(dataSource, deleteInterval);
}