Java tutorial
// Copyright 2017 The Nomulus Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package google.registry.batch; import com.google.appengine.api.datastore.BaseDatastoreService; import com.google.appengine.api.datastore.Key; import com.google.appengine.tools.mapreduce.impl.shardedjob.ShardedJobServiceFactory; import com.google.appengine.tools.pipeline.NoSuchObjectException; import com.google.appengine.tools.pipeline.impl.PipelineManager; import com.google.appengine.tools.pipeline.impl.model.JobRecord; import com.google.appengine.tools.pipeline.util.Pair; import com.google.auto.value.AutoValue; import com.google.common.base.Optional; import com.google.common.collect.ImmutableSet; import java.util.HashSet; import java.util.Set; import javax.annotation.Nullable; import javax.inject.Inject; import org.joda.time.DateTime; /** Utilities used in mapreduce datastore entity cleanup. */ class MapreduceEntityCleanupUtil { /** Number of jobs to fetch at a time using PipelineManager.queryRootPipelines. */ protected static final int MAX_NUMBER_OF_JOBS_PER_SEARCH = 100; private static final ImmutableSet<String> JOB_PREFIXES = ImmutableSet.of("", "map-", "sort-", "merge-", "reduce-"); @Inject MapreduceEntityCleanupUtil() { } /** Return value from {@link #findEligibleJobsByJobName} */ @AutoValue abstract static class EligibleJobResults { static EligibleJobResults create(ImmutableSet<String> jobs, Optional<String> cursor) { return new AutoValue_MapreduceEntityCleanupUtil_EligibleJobResults(jobs, cursor); } abstract ImmutableSet<String> eligibleJobs(); abstract Optional<String> cursor(); } /** * Returns the maximum number of jobs to return per search request. * * <p>This method is present to allow overriding by test subclasses. */ protected int getMaxNumberOfJobsPerSearch() { return MAX_NUMBER_OF_JOBS_PER_SEARCH; } /** * Finds the requested number of root pipeline jobs eligible for deletion. * * <p>Loops through the root jobs returned by the pipeline API, searching for those with a * matching name in an appropriate state, and older than the specified cutoff date. * * <p>Regardless of the setting of maxJobs, a maximum of {@link * #MAX_NUMBER_OF_JOBS_PER_SEARCH} will be returned. If there might be more jobs available to * find, a cursor will be returned, which can be used in a subsequent call to {@link * #findEligibleJobsByJobName} to continue the search. * * @param jobName the desired job name; if null, all jobs are considered to match * @param cutoffDate eligible jobs must have both startTime and endTime before cutoffDate; if * startTime and/or endTime are null, they are considered to be old enough -- this is because * many jobs do lack at least one of these, and we don't want such jobs to stick around * forever and not get deleted * @param maxJobs the maximum number of jobs to return; if absent, return all eligible jobs (see * note above about {@link #MAX_NUMBER_OF_JOBS_PER_SEARCH}) * @param ignoreState if true, jobs will be included regardless of the state * @param cursor if present, a cursor returned from a previous call to the method; the search will * be picked up where it left off * @return job IDs of the eligible jobs */ EligibleJobResults findEligibleJobsByJobName(@Nullable String jobName, DateTime cutoffDate, Optional<Integer> maxJobs, boolean ignoreState, Optional<String> cursor) { if (maxJobs.isPresent() && (maxJobs.get() <= 0)) { return EligibleJobResults.create(ImmutableSet.<String>of(), Optional.<String>absent()); } Set<String> eligibleJobs = new HashSet<>(); Pair<? extends Iterable<JobRecord>, String> pair = PipelineManager.queryRootPipelines(jobName, cursor.orNull(), getMaxNumberOfJobsPerSearch()); for (JobRecord jobRecord : pair.getFirst()) { if (((jobRecord.getStartTime() == null) || jobRecord.getStartTime().before(cutoffDate.toDate())) && ((jobRecord.getEndTime() == null) || jobRecord.getEndTime().before(cutoffDate.toDate())) && (ignoreState || (jobRecord.getState() == JobRecord.State.FINALIZED) || (jobRecord.getState() == JobRecord.State.STOPPED))) { eligibleJobs.add(jobRecord.getRootJobKey().getName()); if (maxJobs.isPresent() && (eligibleJobs.size() >= maxJobs.get())) { return EligibleJobResults.create(ImmutableSet.copyOf(eligibleJobs), Optional.<String>absent()); } } } return EligibleJobResults.create(ImmutableSet.copyOf(eligibleJobs), Optional.fromNullable(pair.getSecond())); } /** * Requests asynchronous deletion of entities associated with the specified job ID. * * <p>The mapreduce API is used to delete the MR-* entities, and the pipeline API is used to * delete the main job records. No attempt is made to check whether the deletion succeeds, only * whether it appeared to be a valid deletion request up front. * * @param datastore The datastore service, which can be either synchronous or asynchronous, since * the only interaction with the database is via prepared queries * @param jobId the root pipeline job ID to be deleted; if the jobId does not exist, the deletion * will be apparently successful, because the underlying library routines do not complain * @param force passed to the pipeline API, indicating whether jobs should be forcibly deleted * even if they are not in a completed state; however, there is no force flag on the mapreduce * API call, meaning that running jobs cannot be deleted * @return an error string, or absent if no error was detected */ Optional<String> deleteJobAsync(BaseDatastoreService datastore, String jobId, boolean force) { // Try to delete the MR-* entities. This is always done asynchronously. A return value of false // indicates that the job is in RUNNING state, and nothing has been done. // TODO(mountford) check the state of all sharded jobs before deleting any for (String mrShardedJobId : getPossibleIdsForPipelineJob(datastore, jobId)) { if (!ShardedJobServiceFactory.getShardedJobService().cleanupJob(mrShardedJobId)) { return Optional.of(String.format("Skipping; job %s is in running state", mrShardedJobId)); } } // If we are successful (meaning, MR-* entity deletion has been kicked off asynchronously), // delete the pipeline-* entities as well. try { PipelineManager.deletePipelineRecords(jobId, force, true /* async */); return Optional.absent(); } catch (NoSuchObjectException ex) { return Optional.of("No such pipeline job"); } catch (IllegalStateException ex) { return Optional.of("Job is not in FINALIZED or STOPPED state"); } } /** * Returns the possible MR-ShardedJob IDs associated with the specified pipeline job and any child * jobs. * * @param datastore The datastore service, which can be either synchronous or asynchronous, since * the only interaction with the database is via prepared queries * @param jobId The pipeline job ID * @return the IDs of MR-ShardedJob entities that the Mapreduce library might have created, * depending on which steps of the mapreduce were used */ private ImmutableSet<String> getPossibleIdsForPipelineJob(BaseDatastoreService datastore, String jobId) { return getPossibleIdsForPipelineJobRecur(datastore, jobId, new HashSet<String>()); } /** * Called by getPossibleIdsForPipelineJob(), and by itself recursively. * * @param datastore The datastore service, which can be either synchronous or asynchronous, since * the only interaction with the database is via prepared queries * @param jobId The pipeline job ID * @param handledJobIds The set of job IDs which have been handled so far; this is a sanity check * to prevent an infinite loop if, for some crazy reason, the job dependency graph is cyclic * @return the IDs of MR-ShardedJob entities that the Mapreduce library might have created, * depending on which steps of the mapreduce were used */ private ImmutableSet<String> getPossibleIdsForPipelineJobRecur(BaseDatastoreService datastore, String jobId, Set<String> handledJobIds) { if (handledJobIds.contains(jobId)) { return ImmutableSet.<String>of(); } handledJobIds.add(jobId); JobRecord jobRecord; try { jobRecord = PipelineManager.getJob(jobId); } catch (NoSuchObjectException e) { return ImmutableSet.<String>of(); } ImmutableSet.Builder<String> idSetBuilder = new ImmutableSet.Builder<>(); for (String jobPrefix : JOB_PREFIXES) { idSetBuilder.add("MR-ShardedJob", jobPrefix + jobId); } for (Key childKey : jobRecord.getChildKeys()) { idSetBuilder.addAll(getPossibleIdsForPipelineJobRecur(datastore, childKey.getName(), handledJobIds)); } return idSetBuilder.build(); } }