Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more contributor license * agreements. See the NOTICE file distributed with this work for additional information regarding * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. You may obtain a * copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under * the License. */ package org.apache.fluo.recipes.core.map; import java.io.Serializable; import java.util.Collections; import java.util.Iterator; import java.util.Map; import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterators; import com.google.common.hash.Hashing; import org.apache.fluo.api.client.SnapshotBase; import org.apache.fluo.api.client.TransactionBase; import org.apache.fluo.api.config.FluoConfiguration; import org.apache.fluo.api.config.SimpleConfiguration; import org.apache.fluo.api.data.Bytes; import org.apache.fluo.api.data.Bytes.BytesBuilder; import org.apache.fluo.api.data.Column; import org.apache.fluo.api.data.RowColumnValue; import org.apache.fluo.api.data.Span; import org.apache.fluo.api.observer.Observer; import org.apache.fluo.api.observer.Observer.NotificationType; import org.apache.fluo.api.observer.ObserverProvider; import org.apache.fluo.api.observer.StringObserver; import org.apache.fluo.recipes.core.combine.CombineQueue; import org.apache.fluo.recipes.core.common.TableOptimizations; import org.apache.fluo.recipes.core.common.TableOptimizations.TableOptimizationsFactory; import org.apache.fluo.recipes.core.serialization.SimpleSerializer; /** * See the project level documentation for information about this recipe. * * @since 1.0.0 * @deprecated since 1.1.0 use {@link CombineQueue} */ @Deprecated public class CollisionFreeMap<K, V> { private Bytes updatePrefix; private Bytes dataPrefix; private Class<V> valType; private SimpleSerializer serializer; private Combiner<K, V> combiner; UpdateObserver<K, V> updateObserver; static final Column UPDATE_COL = new Column("u", "v"); static final Column NEXT_COL = new Column("u", "next"); private int numBuckets = -1; private CombineQueue<K, V> combineQ; private Observer combineQueueObserver; private static class CfmRegistry implements ObserverProvider.Registry { Observer observer; private class Registry implements ObserverProvider.Registry.ObserverArgument, ObserverProvider.Registry.IdentityOption { @Override public ObserverArgument withId(String alias) { return this; } @Override public void useObserver(Observer obs) { observer = obs; } @Override public void useStrObserver(StringObserver obs) { observer = obs; } } @Override public IdentityOption forColumn(Column observedColumn, NotificationType ntfyType) { return new Registry(); } } @SuppressWarnings("unchecked") CollisionFreeMap(SimpleConfiguration appConfig, Options opts, SimpleSerializer serializer) throws Exception { this.updatePrefix = Bytes.of(opts.mapId + ":u:"); this.dataPrefix = Bytes.of(opts.mapId + ":d:"); this.numBuckets = opts.numBuckets; this.valType = (Class<V>) getClass().getClassLoader().loadClass(opts.valueType); this.combiner = (Combiner<K, V>) getClass().getClassLoader().loadClass(opts.combinerType).newInstance(); this.serializer = serializer; if (opts.updateObserverType != null) { this.updateObserver = getClass().getClassLoader().loadClass(opts.updateObserverType) .asSubclass(UpdateObserver.class).newInstance(); } else { this.updateObserver = new NullUpdateObserver<>(); } combineQ = CombineQueue.getInstance(opts.mapId, appConfig); // When this class was deprecated, most of its code was copied to CombineQueue. The following // code is a round about way of using that copied code, with having to make anything in // CombineQueue public. CfmRegistry obsRegistry = new CfmRegistry(); combineQ.registerObserver(obsRegistry, i -> this.combiner.combine(i.getKey(), i.iterator()), (tx, changes) -> this.updateObserver.updatingValues(tx, Update.transform(changes))); combineQueueObserver = obsRegistry.observer; } private V deserVal(Bytes val) { return serializer.deserialize(val.toArray(), valType); } void process(TransactionBase tx, Bytes ntfyRow, Column col) throws Exception { combineQueueObserver.process(tx, ntfyRow, col); } private static final Column DATA_COLUMN = new Column("data", "current"); private Iterator<V> concat(Iterator<V> updates, Bytes currentVal) { if (currentVal == null) { return updates; } return Iterators.concat(updates, Iterators.singletonIterator(deserVal(currentVal))); } /** * This method will retrieve the current value for key and any outstanding updates and combine * them using the configured {@link Combiner}. The result from the combiner is returned. */ public V get(SnapshotBase tx, K key) { byte[] k = serializer.serialize(key); int hash = Hashing.murmur3_32().hashBytes(k).asInt(); String bucketId = genBucketId(Math.abs(hash % numBuckets), numBuckets); BytesBuilder rowBuilder = Bytes.builder(); rowBuilder.append(updatePrefix).append(bucketId).append(':').append(k); Iterator<RowColumnValue> iter = tx.scanner().over(Span.prefix(rowBuilder.toBytes())).build().iterator(); Iterator<V> ui; if (iter.hasNext()) { ui = Iterators.transform(iter, rcv -> deserVal(rcv.getValue())); } else { ui = Collections.<V>emptyList().iterator(); } rowBuilder.setLength(0); rowBuilder.append(dataPrefix).append(bucketId).append(':').append(k); Bytes dataRow = rowBuilder.toBytes(); Bytes cv = tx.get(dataRow, DATA_COLUMN); if (!ui.hasNext()) { if (cv == null) { return null; } else { return deserVal(cv); } } return combiner.combine(key, concat(ui, cv)).orElse(null); } /** * Queues updates for a collision free map. These updates will be made by an Observer executing * another transaction. This method will not collide with other transaction queuing updates for * the same keys. * * @param tx This transaction will be used to make the updates. * @param updates The keys in the map should correspond to keys in the collision free map being * updated. The values in the map will be queued for updating. */ public void update(TransactionBase tx, Map<K, V> updates) { combineQ.addAll(tx, updates); } static String genBucketId(int bucket, int maxBucket) { Preconditions.checkArgument(bucket >= 0); Preconditions.checkArgument(maxBucket > 0); int bits = 32 - Integer.numberOfLeadingZeros(maxBucket); int bucketLen = bits / 4 + (bits % 4 > 0 ? 1 : 0); return Strings.padStart(Integer.toHexString(bucket), bucketLen, '0'); } public static <K2, V2> CollisionFreeMap<K2, V2> getInstance(String mapId, SimpleConfiguration appConf) { Options opts = new Options(mapId, appConf); try { return new CollisionFreeMap<>(appConf, opts, SimpleSerializer.getInstance(appConf)); } catch (Exception e) { // TODO throw new RuntimeException(e); } } /** * A {@link CollisionFreeMap} stores data in its own data format in the Fluo table. When * initializing a Fluo table with something like Map Reduce or Spark, data will need to be written * in this format. That's the purpose of this method, it provide a simple class that can do this * conversion. */ public static <K2, V2> Initializer<K2, V2> getInitializer(String mapId, int numBuckets, SimpleSerializer serializer) { return new Initializer<>(mapId, numBuckets, serializer); } /** * @see CollisionFreeMap#getInitializer(String, int, SimpleSerializer) * * @since 1.0.0 * @deprecated since 1.1.0 */ @Deprecated public static class Initializer<K2, V2> implements Serializable { private static final long serialVersionUID = 1L; private org.apache.fluo.recipes.core.combine.CombineQueue.Initializer<K2, V2> initializer; private Initializer(String mapId, int numBuckets, SimpleSerializer serializer) { this.initializer = CombineQueue.getInitializer(mapId, numBuckets, serializer); } public RowColumnValue convert(K2 key, V2 val) { return initializer.convert(key, val); } } /** * @since 1.0.0 * @deprecated since 1.1.0 */ @Deprecated public static class Options { static final long DEFAULT_BUFFER_SIZE = 1 << 22; static final int DEFAULT_BUCKETS_PER_TABLET = 10; int numBuckets; Integer bucketsPerTablet = null; Long bufferSize; String keyType; String valueType; String combinerType; String updateObserverType; String mapId; private static final String PREFIX = "recipes.cfm."; Options(String mapId, SimpleConfiguration appConfig) { this.mapId = mapId; this.numBuckets = appConfig.getInt(PREFIX + mapId + ".buckets"); this.combinerType = appConfig.getString(PREFIX + mapId + ".combiner"); this.keyType = appConfig.getString(PREFIX + mapId + ".key"); this.valueType = appConfig.getString(PREFIX + mapId + ".val"); this.updateObserverType = appConfig.getString(PREFIX + mapId + ".updateObserver", null); this.bufferSize = appConfig.getLong(PREFIX + mapId + ".bufferSize", DEFAULT_BUFFER_SIZE); this.bucketsPerTablet = appConfig.getInt(PREFIX + mapId + ".bucketsPerTablet", DEFAULT_BUCKETS_PER_TABLET); } public Options(String mapId, String combinerType, String keyType, String valType, int buckets) { Preconditions.checkArgument(buckets > 0); Preconditions.checkArgument(!mapId.contains(":"), "Map id cannot contain ':'"); this.mapId = mapId; this.numBuckets = buckets; this.combinerType = combinerType; this.updateObserverType = null; this.keyType = keyType; this.valueType = valType; } public Options(String mapId, String combinerType, String updateObserverType, String keyType, String valueType, int buckets) { Preconditions.checkArgument(buckets > 0); Preconditions.checkArgument(!mapId.contains(":"), "Map id cannot contain ':'"); this.mapId = mapId; this.numBuckets = buckets; this.combinerType = combinerType; this.updateObserverType = updateObserverType; this.keyType = keyType; this.valueType = valueType; } /** * Sets a limit on the amount of serialized updates to read into memory. Additional memory will * be used to actually deserialize and process the updates. This limit does not account for * object overhead in java, which can be significant. * * <p> * The way memory read is calculated is by summing the length of serialized key and value byte * arrays. Once this sum exceeds the configured memory limit, no more update key values are * processed in the current transaction. When not everything is processed, the observer * processing updates will notify itself causing another transaction to continue processing * later */ public Options setBufferSize(long bufferSize) { Preconditions.checkArgument(bufferSize > 0, "Buffer size must be positive"); this.bufferSize = bufferSize; return this; } long getBufferSize() { if (bufferSize == null) { return DEFAULT_BUFFER_SIZE; } return bufferSize; } /** * Sets the number of buckets per tablet to generate. This affects how many split points will be * generated when optimizing the Accumulo table. */ public Options setBucketsPerTablet(int bucketsPerTablet) { Preconditions.checkArgument(bucketsPerTablet > 0, "bucketsPerTablet is <= 0 : " + bucketsPerTablet); this.bucketsPerTablet = bucketsPerTablet; return this; } public <K, V> Options(String mapId, Class<? extends Combiner<K, V>> combiner, Class<K> keyType, Class<V> valueType, int buckets) { this(mapId, combiner.getName(), keyType.getName(), valueType.getName(), buckets); } public <K, V> Options(String mapId, Class<? extends Combiner<K, V>> combiner, Class<? extends UpdateObserver<K, V>> updateObserver, Class<K> keyType, Class<V> valueType, int buckets) { this(mapId, combiner.getName(), updateObserver.getName(), keyType.getName(), valueType.getName(), buckets); } void save(SimpleConfiguration appConfig) { appConfig.setProperty(PREFIX + mapId + ".combiner", combinerType + ""); if (updateObserverType != null) { appConfig.setProperty(PREFIX + mapId + ".updateObserver", updateObserverType + ""); } } } /** * This method configures a collision free map for use. It must be called before initializing * Fluo. */ public static void configure(FluoConfiguration fluoConfig, Options opts) { org.apache.fluo.recipes.core.combine.CombineQueue.FluentOptions cqopts = CombineQueue.configure(opts.mapId) .keyType(opts.keyType).valueType(opts.valueType).buckets(opts.numBuckets); if (opts.bucketsPerTablet != null) { cqopts.bucketsPerTablet(opts.bucketsPerTablet); } if (opts.bufferSize != null) { cqopts.bufferSize(opts.bufferSize); } cqopts.save(fluoConfig); opts.save(fluoConfig.getAppConfiguration()); fluoConfig.addObserver(new org.apache.fluo.api.config.ObserverSpecification( CollisionFreeMapObserver.class.getName(), ImmutableMap.of("mapId", opts.mapId))); } /** * @deprecated since 1.1.0 use {@link org.apache.fluo.recipes.core.combine.CombineQueue.Optimizer} */ @Deprecated public static class Optimizer implements TableOptimizationsFactory { /** * Return suggested Fluo table optimizations for the specified collision free map. * * @param appConfig Must pass in the application configuration obtained from * {@code FluoClient.getAppConfiguration()} or * {@code FluoConfiguration.getAppConfiguration()} */ @Override public TableOptimizations getTableOptimizations(String mapId, SimpleConfiguration appConfig) { return new org.apache.fluo.recipes.core.combine.CombineQueue.Optimizer().getTableOptimizations(mapId, appConfig); } } }