Java tutorial
/* * Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.uber.hoodie.common.table.timeline; import com.google.common.base.Preconditions; import com.google.common.collect.Sets; import com.uber.hoodie.common.table.HoodieTableMetaClient; import com.uber.hoodie.common.table.HoodieTimeline; import com.uber.hoodie.common.util.FSUtils; import com.uber.hoodie.exception.HoodieIOException; import java.util.Date; import org.apache.commons.io.IOUtils; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import java.io.IOException; import java.io.Serializable; import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.Comparator; import java.util.Optional; import java.util.Set; import java.util.function.Function; import java.util.function.Predicate; import java.util.stream.Collectors; import java.util.stream.Stream; /** * Represents the Active Timeline for the HoodieDataset. Instants for the last 12 hours (configurable) * is in the ActiveTimeline and the rest are Archived. ActiveTimeline is a special timeline * that allows for creation of instants on the timeline. * <p></p> * The timeline is not automatically reloaded on any mutation operation, clients have to manually call reload() * so that they can chain multiple mutations to the timeline and then call reload() once. * <p></p> * This class can be serialized and de-serialized and on de-serialization the FileSystem is re-initialized. */ public class HoodieActiveTimeline extends HoodieDefaultTimeline { public static final SimpleDateFormat COMMIT_FORMATTER = new SimpleDateFormat("yyyyMMddHHmmss"); private final transient static Logger log = LogManager.getLogger(HoodieActiveTimeline.class); private String metaPath; private transient FileSystem fs; /** * Returns next commit time in the {@link #COMMIT_FORMATTER} format. * @return */ public static String createNewCommitTime() { return HoodieActiveTimeline.COMMIT_FORMATTER.format(new Date()); } protected HoodieActiveTimeline(FileSystem fs, String metaPath, String[] includedExtensions) { // Filter all the filter in the metapath and include only the extensions passed and // convert them into HoodieInstant try { this.instants = Arrays.stream(HoodieTableMetaClient.scanFiles(fs, new Path(metaPath), path -> { // Include only the meta files with extensions that needs to be included String extension = FSUtils.getFileExtension(path.getName()); return Arrays.stream(includedExtensions).anyMatch(Predicate.isEqual(extension)); })).sorted(Comparator.comparing( // Sort the meta-data by the instant time (first part of the file name) fileStatus -> FSUtils.getInstantTime(fileStatus.getPath().getName()))) // create HoodieInstantMarkers from FileStatus, which extracts properties .map(HoodieInstant::new).collect(Collectors.toList()); log.info("Loaded instants " + instants); } catch (IOException e) { throw new HoodieIOException("Failed to scan metadata", e); } this.fs = fs; this.metaPath = metaPath; // multiple casts will make this lambda serializable - http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16 this.details = (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails; } public HoodieActiveTimeline(FileSystem fs, String metaPath) { this(fs, metaPath, new String[] { COMMIT_EXTENSION, INFLIGHT_COMMIT_EXTENSION, DELTA_COMMIT_EXTENSION, INFLIGHT_DELTA_COMMIT_EXTENSION, COMPACTION_EXTENSION, INFLIGHT_COMPACTION_EXTENSION, SAVEPOINT_EXTENSION, INFLIGHT_SAVEPOINT_EXTENSION, CLEAN_EXTENSION, INFLIGHT_CLEAN_EXTENSION }); } /** * For serialization and de-serialization only. * @deprecated */ public HoodieActiveTimeline() { } /** * This method is only used when this object is deserialized in a spark executor. * * @deprecated */ private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException { in.defaultReadObject(); this.fs = FSUtils.getFs(); } /** * Get all instants (commits, delta commits, compactions) that produce new data, in the active timeline ** * @return */ public HoodieTimeline getCommitsAndCompactionsTimeline() { return getTimelineOfActions(Sets.newHashSet(COMMIT_ACTION, COMPACTION_ACTION, DELTA_COMMIT_ACTION)); } /** * Get only pure commits (inflight and completed) in the active timeline * * @return */ public HoodieTimeline getCommitTimeline() { return getTimelineOfActions(Sets.newHashSet(COMMIT_ACTION)); } /** * Get only the delta commits (inflight and completed) in the active timeline * * @return */ public HoodieTimeline getDeltaCommitTimeline() { return new HoodieDefaultTimeline(filterInstantsByAction(DELTA_COMMIT_ACTION), (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails); } /** * Get only the commits (inflight and completed) in the compaction timeline * * @return */ public HoodieTimeline getCompactionTimeline() { return new HoodieDefaultTimeline(filterInstantsByAction(COMPACTION_ACTION), (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails); } /** * Get a timeline of a specific set of actions. useful to create a merged timeline of multiple actions * * @param actions actions allowed in the timeline * @return */ public HoodieTimeline getTimelineOfActions(Set<String> actions) { return new HoodieDefaultTimeline(instants.stream().filter(s -> actions.contains(s.getAction())), (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails); } /** * Get only the cleaner action (inflight and completed) in the active timeline * * @return */ public HoodieTimeline getCleanerTimeline() { return new HoodieDefaultTimeline(filterInstantsByAction(CLEAN_ACTION), (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails); } /** * Get only the rollback action (inflight and completed) in the active timeline * * @return */ public HoodieTimeline getRollbackTimeline() { return new HoodieDefaultTimeline(filterInstantsByAction(ROLLBACK_ACTION), (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails); } /** * Get only the save point action (inflight and completed) in the active timeline * * @return */ public HoodieTimeline getSavePointTimeline() { return new HoodieDefaultTimeline(filterInstantsByAction(SAVEPOINT_ACTION), (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails); } protected Stream<HoodieInstant> filterInstantsByAction(String action) { return instants.stream().filter(s -> s.getAction().equals(action)); } public void createInflight(HoodieInstant instant) { log.info("Creating a new in-flight instant " + instant); // Create the in-flight file createFileInMetaPath(instant.getFileName(), Optional.empty()); } public void saveAsComplete(HoodieInstant instant, Optional<byte[]> data) { log.info("Marking instant complete " + instant); Preconditions.checkArgument(instant.isInflight(), "Could not mark an already completed instant as complete again " + instant); moveInflightToComplete(instant, HoodieTimeline.getCompletedInstant(instant), data); log.info("Completed " + instant); } public void revertToInflight(HoodieInstant instant) { log.info("Reverting instant to inflight " + instant); moveCompleteToInflight(instant, HoodieTimeline.getInflightInstant(instant)); log.info("Reverted " + instant + " to inflight"); } public void deleteInflight(HoodieInstant instant) { log.info("Deleting in-flight " + instant); Path inFlightCommitFilePath = new Path(metaPath, instant.getFileName()); try { boolean result = fs.delete(inFlightCommitFilePath, false); if (result) { log.info("Removed in-flight " + instant); } else { throw new HoodieIOException("Could not delete in-flight instant " + instant); } } catch (IOException e) { throw new HoodieIOException("Could not remove inflight commit " + inFlightCommitFilePath, e); } } @Override public Optional<byte[]> getInstantDetails(HoodieInstant instant) { Path detailPath = new Path(metaPath, instant.getFileName()); return readDataFromPath(detailPath); } protected void moveInflightToComplete(HoodieInstant inflight, HoodieInstant completed, Optional<byte[]> data) { Path commitFilePath = new Path(metaPath, completed.getFileName()); try { // open a new file and write the commit metadata in Path inflightCommitFile = new Path(metaPath, inflight.getFileName()); createFileInMetaPath(inflight.getFileName(), data); boolean success = fs.rename(inflightCommitFile, commitFilePath); if (!success) { throw new HoodieIOException("Could not rename " + inflightCommitFile + " to " + commitFilePath); } } catch (IOException e) { throw new HoodieIOException("Could not complete " + inflight, e); } } protected void moveCompleteToInflight(HoodieInstant completed, HoodieInstant inflight) { Path inFlightCommitFilePath = new Path(metaPath, inflight.getFileName()); try { if (!fs.exists(inFlightCommitFilePath)) { Path commitFilePath = new Path(metaPath, completed.getFileName()); boolean success = fs.rename(commitFilePath, inFlightCommitFilePath); if (!success) { throw new HoodieIOException( "Could not rename " + commitFilePath + " to " + inFlightCommitFilePath); } } } catch (IOException e) { throw new HoodieIOException("Could not complete revert " + completed, e); } } protected void createFileInMetaPath(String filename, Optional<byte[]> content) { Path fullPath = new Path(metaPath, filename); try { if (!content.isPresent()) { if (fs.createNewFile(fullPath)) { log.info("Created a new file in meta path: " + fullPath); return; } } else { FSDataOutputStream fsout = fs.create(fullPath, true); fsout.write(content.get()); fsout.close(); return; } throw new HoodieIOException("Failed to create file " + fullPath); } catch (IOException e) { throw new HoodieIOException("Failed to create file " + fullPath, e); } } protected Optional<byte[]> readDataFromPath(Path detailPath) { try (FSDataInputStream is = fs.open(detailPath)) { return Optional.of(IOUtils.toByteArray(is)); } catch (IOException e) { throw new HoodieIOException("Could not read commit details from " + detailPath, e); } } public HoodieActiveTimeline reload() { return new HoodieActiveTimeline(fs, metaPath); } }