Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.druid.indexing.common.task; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Function; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.collect.Iterables; import com.google.common.collect.Sets; import org.apache.druid.indexer.TaskStatus; import org.apache.druid.indexing.common.TaskToolbox; import org.apache.druid.indexing.common.actions.SegmentInsertAction; import org.apache.druid.indexing.common.actions.SegmentListUsedAction; import org.apache.druid.indexing.common.actions.TaskActionClient; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.guava.FunctionalIterable; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.loading.SegmentLoadingException; import org.apache.druid.timeline.DataSegment; import org.joda.time.Interval; import javax.annotation.Nullable; import java.io.File; import java.io.IOException; import java.util.Collections; import java.util.List; import java.util.Map; /** * This task takes a segment and attempts to reindex it in the latest version with the specified indexSpec. * <p/> * Only datasource must be specified. `indexSpec` and `force` are highly suggested but optional. The rest get * auto-configured and should only be modified with great care */ public class ConvertSegmentTask extends AbstractFixedIntervalTask { private static final String TYPE = "convert_segment"; private static final Integer CURR_VERSION_INTEGER = IndexIO.CURRENT_VERSION_ID; private static final Logger log = new Logger(ConvertSegmentTask.class); /** * Create a segment converter task to convert a segment to the most recent version including the specified indexSpec * * @param dataSource The datasource to which this update should be applied * @param interval The interval in the datasource which to apply the update to * @param indexSpec The IndexSpec to use in the updated segments * @param force Force an update, even if the task thinks it doesn't need to update. * @param validate Validate the new segment compared to the old segment on a row by row basis * * @return A SegmentConverterTask for the datasource's interval with the indexSpec specified. */ public static ConvertSegmentTask create(String dataSource, Interval interval, IndexSpec indexSpec, boolean force, boolean validate, @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory, Map<String, Object> context) { final String id = makeId(dataSource, interval); return new ConvertSegmentTask(id, dataSource, interval, null, indexSpec, force, validate, segmentWriteOutMediumFactory, context); } /** * Create a task to update the segment specified to the most recent binary version with the specified indexSpec * * @param segment The segment to which this update should be applied * @param indexSpec The IndexSpec to use in the updated segments * @param force Force an update, even if the task thinks it doesn't need to update. * @param validate Validate the new segment compared to the old segment on a row by row basis * * @return A SegmentConverterTask for the segment with the indexSpec specified. */ public static ConvertSegmentTask create(DataSegment segment, IndexSpec indexSpec, boolean force, boolean validate, @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory, Map<String, Object> context) { final Interval interval = segment.getInterval(); final String dataSource = segment.getDataSource(); final String id = makeId(dataSource, interval); return new ConvertSegmentTask(id, dataSource, interval, segment, indexSpec, force, validate, segmentWriteOutMediumFactory, context); } protected static String makeId(String dataSource, Interval interval) { Preconditions.checkNotNull(dataSource, "dataSource"); Preconditions.checkNotNull(interval, "interval"); return joinId(TYPE, dataSource, interval.getStart(), interval.getEnd(), DateTimes.nowUtc()); } @JsonCreator private static ConvertSegmentTask createFromJson(@JsonProperty("id") String id, @JsonProperty("dataSource") String dataSource, @JsonProperty("interval") Interval interval, @JsonProperty("segment") DataSegment segment, @JsonProperty("indexSpec") IndexSpec indexSpec, @JsonProperty("force") Boolean force, @JsonProperty("validate") Boolean validate, @JsonProperty("context") Map<String, Object> context, @JsonProperty("segmentWriteOutMediumFactory") @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory) { final boolean isForce = force == null ? false : force; final boolean isValidate = validate == null ? true : validate; if (id == null) { if (segment == null) { return create(dataSource, interval, indexSpec, isForce, isValidate, segmentWriteOutMediumFactory, context); } else { return create(segment, indexSpec, isForce, isValidate, segmentWriteOutMediumFactory, context); } } return new ConvertSegmentTask(id, dataSource, interval, segment, indexSpec, isForce, isValidate, segmentWriteOutMediumFactory, context); } @JsonIgnore private final DataSegment segment; private final IndexSpec indexSpec; private final boolean force; private final boolean validate; @Nullable private final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory; ConvertSegmentTask(String id, String dataSource, Interval interval, DataSegment segment, IndexSpec indexSpec, boolean force, boolean validate, @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory, Map<String, Object> context) { super(id, dataSource, interval, context); this.segment = segment; this.indexSpec = indexSpec == null ? new IndexSpec() : indexSpec; this.force = force; this.validate = validate; this.segmentWriteOutMediumFactory = segmentWriteOutMediumFactory; } @JsonProperty public boolean isForce() { return force; } @JsonProperty public boolean isValidate() { return validate; } @JsonProperty public IndexSpec getIndexSpec() { return indexSpec; } @Override public String getType() { return TYPE; } @JsonProperty public DataSegment getSegment() { return segment; } @JsonProperty @Nullable public SegmentWriteOutMediumFactory getSegmentWriteOutMediumFactory() { return segmentWriteOutMediumFactory; } @Override public TaskStatus run(TaskToolbox toolbox) throws Exception { final Iterable<DataSegment> segmentsToUpdate; if (segment == null) { final List<DataSegment> segments = toolbox.getTaskActionClient() .submit(new SegmentListUsedAction(getDataSource(), getInterval(), null)); segmentsToUpdate = FunctionalIterable.create(segments).filter(new Predicate<DataSegment>() { @Override public boolean apply(DataSegment segment) { final Integer segmentVersion = segment.getBinaryVersion(); if (!CURR_VERSION_INTEGER.equals(segmentVersion)) { return true; } else if (force) { log.info("Segment[%s] already at version[%s], forcing conversion", segment.getIdentifier(), segmentVersion); return true; } else { log.info("Skipping[%s], already version[%s]", segment.getIdentifier(), segmentVersion); return false; } } }); } else { log.info("I'm in a subless mood."); segmentsToUpdate = Collections.singleton(segment); } // Vestigial from a past time when this task spawned subtasks. for (final Task subTask : generateSubTasks(getGroupId(), segmentsToUpdate, indexSpec, force, validate, getContext())) { final TaskStatus status = subTask.run(toolbox); if (!status.isSuccess()) { return TaskStatus.fromCode(getId(), status.getStatusCode()); } } return success(); } protected Iterable<Task> generateSubTasks(final String groupId, final Iterable<DataSegment> segments, final IndexSpec indexSpec, final boolean force, final boolean validate, final Map<String, Object> context) { return Iterables.transform(segments, new Function<DataSegment, Task>() { @Override public Task apply(DataSegment input) { return new SubTask(groupId, input, indexSpec, force, validate, segmentWriteOutMediumFactory, context); } }); } @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } ConvertSegmentTask that = (ConvertSegmentTask) o; if (segment != null ? !segment.equals(that.segment) : that.segment != null) { return false; } return super.equals(o); } public static class SubTask extends AbstractFixedIntervalTask { @JsonIgnore private final DataSegment segment; private final IndexSpec indexSpec; private final boolean force; private final boolean validate; @Nullable private final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory; @JsonCreator public SubTask(@JsonProperty("groupId") String groupId, @JsonProperty("segment") DataSegment segment, @JsonProperty("indexSpec") IndexSpec indexSpec, @JsonProperty("force") Boolean force, @JsonProperty("validate") Boolean validate, @JsonProperty("segmentWriteOutMediumFactory") @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory, @JsonProperty("context") Map<String, Object> context) { super(joinId(groupId, "sub", segment.getInterval().getStart(), segment.getInterval().getEnd(), segment.getShardSpec().getPartitionNum()), groupId, segment.getDataSource(), segment.getInterval(), context); this.segment = segment; this.indexSpec = indexSpec == null ? new IndexSpec() : indexSpec; this.force = force == null ? false : force; this.validate = validate == null ? true : validate; this.segmentWriteOutMediumFactory = segmentWriteOutMediumFactory; } @JsonProperty public boolean isValidate() { return validate; } @JsonProperty public boolean isForce() { return force; } @JsonProperty public DataSegment getSegment() { return segment; } @Override public String getType() { return "version_converter_sub"; } @Override public TaskStatus run(TaskToolbox toolbox) throws Exception { log.info("Subs are good! Italian BMT and Meatball are probably my favorite."); try { convertSegment(toolbox); } catch (Exception e) { log.error(e, "Conversion failed."); throw e; } return success(); } private void convertSegment(TaskToolbox toolbox) throws SegmentLoadingException, IOException { log.info("Converting segment[%s]", segment); final TaskActionClient actionClient = toolbox.getTaskActionClient(); final List<DataSegment> currentSegments = actionClient .submit(new SegmentListUsedAction(segment.getDataSource(), segment.getInterval(), null)); for (DataSegment currentSegment : currentSegments) { final String version = currentSegment.getVersion(); final Integer binaryVersion = currentSegment.getBinaryVersion(); if (!force && (version.startsWith(segment.getVersion()) && CURR_VERSION_INTEGER.equals(binaryVersion))) { log.info("Skipping already updated segment[%s].", segment); return; } } final Map<DataSegment, File> localSegments = toolbox.fetchSegments(Collections.singletonList(segment)); final File location = localSegments.get(segment); final File outLocation = new File(location, "v9_out"); IndexIO indexIO = toolbox.getIndexIO(); if (indexIO.convertSegment(location, outLocation, indexSpec, force, validate, segmentWriteOutMediumFactory)) { final int outVersion = IndexIO.getVersionFromDir(outLocation); // Appending to the version makes a new version that inherits most comparability parameters of the original // version, but is "newer" than said original version. DataSegment updatedSegment = segment .withVersion(StringUtils.format("%s_v%s", segment.getVersion(), outVersion)); updatedSegment = toolbox.getSegmentPusher().push(outLocation, updatedSegment, false); actionClient.submit(new SegmentInsertAction(Sets.newHashSet(updatedSegment))); } else { log.info("Conversion failed."); } } } }