List of usage examples for org.joda.time Interval getStart
public DateTime getStart()
From source file:io.druid.indexer.HadoopDruidIndexerConfig.java
License:Apache License
public Path makeSegmentPartitionInfoPath(Interval bucketInterval) { return new Path(String.format("%s/%s_%s/partitions.json", makeIntermediatePath(), ISODateTimeFormat.basicDateTime().print(bucketInterval.getStart()), ISODateTimeFormat.basicDateTime().print(bucketInterval.getEnd()))); }
From source file:io.druid.indexer.HadoopDruidIndexerJob.java
License:Open Source License
@Override public boolean run() { List<Jobby> jobs = Lists.newArrayList(); ensurePaths();/*from w ww .ja v a 2 s. c o m*/ if (config.partitionByDimension()) { jobs.add(new DeterminePartitionsJob(config)); } else { Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance()); int shardCount = 0; for (Interval segmentGranularity : config.getSegmentGranularIntervals()) { DateTime bucket = segmentGranularity.getStart(); final HadoopyShardSpec spec = new HadoopyShardSpec(new NoneShardSpec(), shardCount++); shardSpecs.put(bucket, Lists.newArrayList(spec)); log.info("DateTime[%s], spec[%s]", bucket, spec); } config.setShardSpecs(shardSpecs); } indexJob = new IndexGeneratorJob(config); jobs.add(indexJob); if (dbUpdaterJob != null) { jobs.add(dbUpdaterJob); } else { log.info("No updaterJobSpec set, not uploading to database"); } String failedMessage = null; for (Jobby job : jobs) { if (failedMessage == null) { if (!job.run()) { failedMessage = String.format("Job[%s] failed!", job.getClass()); } } } if (failedMessage == null) { publishedSegments = IndexGeneratorJob.getPublishedSegments(config); } if (!config.isLeaveIntermediate()) { if (failedMessage == null || config.isCleanupOnFailure()) { Path workingPath = config.makeIntermediatePath(); log.info("Deleting path[%s]", workingPath); try { workingPath.getFileSystem(new Configuration()).delete(workingPath, true); } catch (IOException e) { log.error(e, "Failed to cleanup path[%s]", workingPath); } } } if (failedMessage != null) { throw new ISE(failedMessage); } return true; }
From source file:io.druid.indexer.JobHelper.java
License:Apache License
public static Path makeSegmentOutputPath(Path basePath, FileSystem fileSystem, String dataSource, String version, Interval interval, int partitionNum) { Path outputPath = new Path(prependFSIfNullScheme(fileSystem, basePath), "./" + dataSource); if ("hdfs".equals(fileSystem.getScheme())) { outputPath = new Path(outputPath, String.format("./%s_%s", interval.getStart().toString(ISODateTimeFormat.basicDateTime()), interval.getEnd().toString(ISODateTimeFormat.basicDateTime()))); outputPath = new Path(outputPath, version.replace(":", "_")); } else {//from w w w . java2 s . c o m outputPath = new Path(outputPath, String.format("./%s_%s", interval.getStart().toString(), interval.getEnd().toString())); outputPath = new Path(outputPath, String.format("./%s", version)); } outputPath = new Path(outputPath, Integer.toString(partitionNum)); return outputPath; }
From source file:io.druid.indexer.path.GranularityPathSpec.java
License:Apache License
@Override public Job addInputPaths(HadoopDruidIndexerConfig config, Job job) throws IOException { final Set<Interval> intervals = Sets.newTreeSet(Comparators.intervals()); Optional<Set<Interval>> optionalIntervals = config.getSegmentGranularIntervals(); if (optionalIntervals.isPresent()) { for (Interval segmentInterval : optionalIntervals.get()) { for (Interval dataInterval : dataGranularity.getIterable(segmentInterval)) { intervals.add(dataInterval); }/*from w w w . j a v a 2 s .c o m*/ } } Path betaInput = new Path(inputPath); FileSystem fs = betaInput.getFileSystem(job.getConfiguration()); Set<String> paths = Sets.newTreeSet(); Pattern fileMatcher = Pattern.compile(filePattern); DateTimeFormatter customFormatter = null; if (pathFormat != null) { customFormatter = DateTimeFormat.forPattern(pathFormat); } for (Interval interval : intervals) { DateTime t = interval.getStart(); String intervalPath = null; if (customFormatter != null) { intervalPath = customFormatter.print(t); } else { intervalPath = dataGranularity.toPath(t); } Path granularPath = new Path(betaInput, intervalPath); log.info("Checking path[%s]", granularPath); for (FileStatus status : FSSpideringIterator.spiderIterable(fs, granularPath)) { final Path filePath = status.getPath(); if (fileMatcher.matcher(filePath.toString()).matches()) { paths.add(filePath.toString()); } } } for (String path : paths) { log.info("Appending path[%s]", path); StaticPathSpec.addToMultipleInputs(config, job, path, inputFormat); } return job; }
From source file:io.druid.indexing.common.task.AbstractTask.java
License:Apache License
public static String makeId(String id, final String typeName, String dataSource, Interval interval) { return id != null ? id : joinId(typeName, dataSource, interval.getStart(), interval.getEnd(), new DateTime().toString()); }
From source file:io.druid.indexing.common.task.ConvertSegmentTask.java
License:Apache License
protected static String makeId(String dataSource, Interval interval) { Preconditions.checkNotNull(dataSource, "dataSource"); Preconditions.checkNotNull(interval, "interval"); return joinId(TYPE, dataSource, interval.getStart(), interval.getEnd(), new DateTime()); }
From source file:io.druid.indexing.common.task.IndexTask.java
License:Apache License
private DataSegment generateSegment(final TaskToolbox toolbox, final DataSchema schema, final ShardSpec shardSpec, final Interval interval, final String version) throws IOException { // Set up temporary directory. final File tmpDir = new File(toolbox.getTaskWorkDir(), String.format("%s_%s_%s_%s_%s", this.getDataSource(), interval.getStart(), interval.getEnd(), version, shardSpec.getPartitionNum())); final FirehoseFactory firehoseFactory = ingestionSchema.getIOConfig().getFirehoseFactory(); final int rowFlushBoundary = ingestionSchema.getTuningConfig().getRowFlushBoundary(); // We need to track published segments. final List<DataSegment> pushedSegments = new CopyOnWriteArrayList<DataSegment>(); final DataSegmentPusher wrappedDataSegmentPusher = new DataSegmentPusher() { @Override//from ww w. j a va 2 s . c o m public String getPathForHadoop(String dataSource) { return toolbox.getSegmentPusher().getPathForHadoop(dataSource); } @Override public DataSegment push(File file, DataSegment segment) throws IOException { final DataSegment pushedSegment = toolbox.getSegmentPusher().push(file, segment); pushedSegments.add(pushedSegment); return pushedSegment; } }; // rowFlushBoundary for this job final int myRowFlushBoundary = rowFlushBoundary > 0 ? rowFlushBoundary : toolbox.getConfig().getDefaultRowFlushBoundary(); // Create firehose + plumber final FireDepartmentMetrics metrics = new FireDepartmentMetrics(); final Firehose firehose = firehoseFactory.connect(ingestionSchema.getDataSchema().getParser()); final Supplier<Committer> committerSupplier = Committers.supplierFromFirehose(firehose); final Plumber plumber = new YeOldePlumberSchool(interval, version, wrappedDataSegmentPusher, tmpDir) .findPlumber(schema, convertTuningConfig(shardSpec, myRowFlushBoundary, ingestionSchema.getTuningConfig().getIndexSpec()), metrics); final QueryGranularity rollupGran = ingestionSchema.getDataSchema().getGranularitySpec() .getQueryGranularity(); try { plumber.startJob(); while (firehose.hasMore()) { final InputRow inputRow = firehose.nextRow(); if (shouldIndex(shardSpec, interval, inputRow, rollupGran)) { int numRows = plumber.add(inputRow, committerSupplier); if (numRows == -1) { throw new ISE(String.format("Was expecting non-null sink for timestamp[%s]", new DateTime(inputRow.getTimestampFromEpoch()))); } metrics.incrementProcessed(); } else { metrics.incrementThrownAway(); } } } finally { firehose.close(); } plumber.persist(committerSupplier.get()); try { plumber.finishJob(); } finally { log.info( "Task[%s] interval[%s] partition[%d] took in %,d rows (%,d processed, %,d unparseable, %,d thrown away)" + " and output %,d rows", getId(), interval, shardSpec.getPartitionNum(), metrics.processed() + metrics.unparseable() + metrics.thrownAway(), metrics.processed(), metrics.unparseable(), metrics.thrownAway(), metrics.rowOutput()); } // We expect a single segment to have been created. return Iterables.getOnlyElement(pushedSegments); }
From source file:io.druid.indexing.common.task.TaskUtils.java
License:Apache License
public static String makeId(String id, final String typeName, String dataSource, Interval interval) { return id != null ? id : String.format("%s_%s_%s_%s_%s", typeName, dataSource, interval.getStart(), interval.getEnd(), new DateTime().toString()); }
From source file:io.druid.indexing.common.task.VersionConverterTask.java
License:Apache License
private static String makeId(String dataSource, Interval interval) { Preconditions.checkNotNull(dataSource, "dataSource"); Preconditions.checkNotNull(interval, "interval"); return joinId(TYPE, dataSource, interval.getStart(), interval.getEnd(), new DateTime()); }
From source file:io.druid.indexing.coordinator.IndexerDBCoordinator.java
License:Open Source License
public List<DataSegment> getUnusedSegmentsForInterval(final String dataSource, final Interval interval) { List<DataSegment> matchingSegments = dbi.withHandle(new HandleCallback<List<DataSegment>>() { @Override/* www . j a v a2 s. c o m*/ public List<DataSegment> withHandle(Handle handle) throws IOException { return handle.createQuery(String.format( "SELECT payload FROM %s WHERE dataSource = :dataSource and start >= :start and end <= :end and used = 0", dbTables.getSegmentsTable())).bind("dataSource", dataSource) .bind("start", interval.getStart().toString()).bind("end", interval.getEnd().toString()) .fold(Lists.<DataSegment>newArrayList(), new Folder3<List<DataSegment>, Map<String, Object>>() { @Override public List<DataSegment> fold(List<DataSegment> accumulator, Map<String, Object> stringObjectMap, FoldController foldController, StatementContext statementContext) throws SQLException { try { DataSegment segment = jsonMapper.readValue( (String) stringObjectMap.get("payload"), DataSegment.class); accumulator.add(segment); return accumulator; } catch (Exception e) { throw Throwables.propagate(e); } } }); } }); log.info("Found %,d segments for %s for interval %s.", matchingSegments.size(), dataSource, interval); return matchingSegments; }