Java tutorial
/* * Copyright 2008-2010 LinkedIn, Inc * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package voldemort.store.routed; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.lang.mutable.MutableInt; import org.apache.log4j.Logger; import voldemort.VoldemortApplicationException; import voldemort.VoldemortException; import voldemort.cluster.Cluster; import voldemort.cluster.Node; import voldemort.cluster.failuredetector.FailureDetector; import voldemort.routing.RoutingStrategy; import voldemort.routing.RoutingStrategyFactory; import voldemort.store.InsufficientOperationalNodesException; import voldemort.store.NoSuchCapabilityException; import voldemort.store.Store; import voldemort.store.StoreCapabilityType; import voldemort.store.StoreDefinition; import voldemort.store.StoreUtils; import voldemort.store.UnreachableStoreException; import voldemort.utils.ByteArray; import voldemort.utils.ByteUtils; import voldemort.utils.SystemTime; import voldemort.utils.Time; import voldemort.utils.Utils; import voldemort.versioning.ObsoleteVersionException; import voldemort.versioning.VectorClock; import voldemort.versioning.Version; import voldemort.versioning.Versioned; import com.google.common.base.Function; import com.google.common.collect.Lists; import com.google.common.collect.Maps; /** * A Store which multiplexes requests to different internal Stores * * */ public class RoutedStore implements Store<ByteArray, byte[]> { private static final Logger logger = Logger.getLogger(RoutedStore.class.getName()); private final static StoreOp<Versioned<byte[]>> VERSIONED_OP = new StoreOp<Versioned<byte[]>>() { public List<Versioned<byte[]>> execute(Store<ByteArray, byte[]> store, ByteArray key) { return store.get(key); } }; private final static StoreOp<Version> VERSION_OP = new StoreOp<Version>() { public List<Version> execute(Store<ByteArray, byte[]> store, ByteArray key) { return store.getVersions(key); } }; private final String name; private final Map<Integer, Store<ByteArray, byte[]>> innerStores; private final ExecutorService executor; private final boolean repairReads; private final ReadRepairer<ByteArray, byte[]> readRepairer; private final long timeoutMs; private final Time time; private final StoreDefinition storeDef; private final FailureDetector failureDetector; private volatile RoutingStrategy routingStrategy; /** * Create a RoutedStoreClient * * @param name The name of the store * @param innerStores The mapping of node to client * @param routingStrategy The strategy for choosing a node given a key * @param requiredReads The minimum number of reads that must complete * before the operation will return * @param requiredWrites The minimum number of writes that must complete * before the operation will return * @param numberOfThreads The number of threads in the threadpool */ public RoutedStore(String name, Map<Integer, Store<ByteArray, byte[]>> innerStores, Cluster cluster, StoreDefinition storeDef, int numberOfThreads, boolean repairReads, long timeoutMs, FailureDetector failureDetector) { this(name, innerStores, cluster, storeDef, repairReads, Executors.newFixedThreadPool(numberOfThreads), timeoutMs, failureDetector, SystemTime.INSTANCE); } /** * Create a RoutedStoreClient * * @param name The name of the store * @param innerStores The mapping of node to client * @param routingStrategy The strategy for choosing a node given a key * @param requiredReads The minimum number of reads that must complete * before the operation will return * @param requiredWrites The minimum number of writes that must complete * before the operation will return * @param threadPool The threadpool to use */ public RoutedStore(String name, Map<Integer, Store<ByteArray, byte[]>> innerStores, Cluster cluster, StoreDefinition storeDef, boolean repairReads, ExecutorService threadPool, long timeoutMs, FailureDetector failureDetector, Time time) { if (storeDef.getRequiredReads() < 1) throw new IllegalArgumentException("Cannot have a storeDef.getRequiredReads() number less than 1."); if (storeDef.getRequiredWrites() < 1) throw new IllegalArgumentException("Cannot have a storeDef.getRequiredWrites() number less than 1."); if (storeDef.getPreferredReads() < storeDef.getRequiredReads()) throw new IllegalArgumentException( "storeDef.getPreferredReads() must be greater or equal to storeDef.getRequiredReads()."); if (storeDef.getPreferredWrites() < storeDef.getRequiredWrites()) throw new IllegalArgumentException( "storeDef.getPreferredWrites() must be greater or equal to storeDef.getRequiredWrites()."); if (storeDef.getPreferredReads() > innerStores.size()) throw new IllegalArgumentException( "storeDef.getPreferredReads() is larger than the total number of nodes!"); if (storeDef.getPreferredWrites() > innerStores.size()) throw new IllegalArgumentException( "storeDef.getPreferredWrites() is larger than the total number of nodes!"); this.name = name; this.innerStores = new ConcurrentHashMap<Integer, Store<ByteArray, byte[]>>(innerStores); this.repairReads = repairReads; this.executor = threadPool; this.readRepairer = new ReadRepairer<ByteArray, byte[]>(); this.timeoutMs = timeoutMs; this.time = Utils.notNull(time); this.storeDef = storeDef; this.failureDetector = failureDetector; this.routingStrategy = new RoutingStrategyFactory().updateRoutingStrategy(storeDef, cluster); } public void updateRoutingStrategy(RoutingStrategy routingStrategy) { logger.info("Updating routing strategy for RoutedStore:" + getName()); this.routingStrategy = routingStrategy; } public boolean delete(final ByteArray key, final Version version) throws VoldemortException { StoreUtils.assertValidKey(key); final List<Node> nodes = availableNodes(routingStrategy.routeRequest(key.get())); // quickly fail if there aren't enough live nodes to meet the // requirements final int numNodes = nodes.size(); if (numNodes < this.storeDef.getRequiredWrites()) throw new InsufficientOperationalNodesException("Only " + numNodes + " nodes in preference list, but " + this.storeDef.getRequiredWrites() + " writes required."); // A count of the number of successful operations final AtomicInteger successes = new AtomicInteger(0); final AtomicBoolean deletedSomething = new AtomicBoolean(false); // A list of thrown exceptions, indicating the number of failures final List<Exception> failures = Collections.synchronizedList(new LinkedList<Exception>()); // A semaphore indicating the number of completed operations // Once inititialized all permits are acquired, after that // permits are released when an operation is completed. // semaphore.acquire(n) waits for n operations to complete final Semaphore semaphore = new Semaphore(0, false); // Add the operations to the pool for (final Node node : nodes) { this.executor.execute(new Runnable() { public void run() { long startNs = System.nanoTime(); try { boolean deleted = innerStores.get(node.getId()).delete(key, version); successes.incrementAndGet(); deletedSomething.compareAndSet(false, deleted); recordSuccess(node, startNs); } catch (UnreachableStoreException e) { failures.add(e); recordException(node, startNs, e); } catch (VoldemortApplicationException e) { throw e; } catch (Exception e) { failures.add(e); logger.warn("Error in DELETE on node " + node.getId() + "(" + node.getHost() + ")", e); } finally { // signal that the operation is complete semaphore.release(); } } }); } int attempts = Math.min(storeDef.getPreferredWrites(), numNodes); if (this.storeDef.getPreferredWrites() <= 0) { return true; } else { for (int i = 0; i < numNodes; i++) { try { boolean acquired = semaphore.tryAcquire(timeoutMs, TimeUnit.MILLISECONDS); if (!acquired) logger.warn("Delete operation timed out waiting for operation " + i + " to complete after waiting " + timeoutMs + " ms."); // okay, at least the required number of operations have // completed, were they successful? if (successes.get() >= attempts) return deletedSomething.get(); } catch (InterruptedException e) { throw new InsufficientOperationalNodesException("Delete operation interrupted!", e); } } } // If we get to here, that means we couldn't hit the preferred number // of writes, throw an exception if you can't even hit the required // number if (successes.get() < storeDef.getRequiredWrites()) throw new InsufficientOperationalNodesException( this.storeDef.getRequiredWrites() + " deletes required, but " + successes.get() + " succeeded.", failures); else return deletedSomething.get(); } public Map<ByteArray, List<Versioned<byte[]>>> getAll(Iterable<ByteArray> keys) throws VoldemortException { StoreUtils.assertValidKeys(keys); Map<ByteArray, List<Versioned<byte[]>>> result = StoreUtils.newEmptyHashMap(keys); // Keys for each node needed to satisfy storeDef.getPreferredReads() if // no failures. Map<Node, List<ByteArray>> nodeToKeysMap = Maps.newHashMap(); // Keep track of nodes per key that might be needed if there are // failures during getAll Map<ByteArray, List<Node>> keyToExtraNodesMap = Maps.newHashMap(); for (ByteArray key : keys) { List<Node> availableNodes = availableNodes(routingStrategy.routeRequest(key.get())); // quickly fail if there aren't enough nodes to meet the requirement checkRequiredReads(availableNodes); int preferredReads = storeDef.getPreferredReads(); List<Node> preferredNodes = Lists.newArrayListWithCapacity(preferredReads); List<Node> extraNodes = Lists.newArrayListWithCapacity(3); for (Node node : availableNodes) { if (preferredNodes.size() < preferredReads) preferredNodes.add(node); else extraNodes.add(node); } for (Node node : preferredNodes) { List<ByteArray> nodeKeys = nodeToKeysMap.get(node); if (nodeKeys == null) { nodeKeys = Lists.newArrayList(); nodeToKeysMap.put(node, nodeKeys); } nodeKeys.add(key); } if (!extraNodes.isEmpty()) { List<Node> nodes = keyToExtraNodesMap.get(key); if (nodes == null) keyToExtraNodesMap.put(key, extraNodes); else nodes.addAll(extraNodes); } } List<Callable<GetAllResult>> callables = Lists.newArrayList(); for (Map.Entry<Node, List<ByteArray>> entry : nodeToKeysMap.entrySet()) { final Node node = entry.getKey(); final Collection<ByteArray> nodeKeys = entry.getValue(); if (failureDetector.isAvailable(node)) callables.add(new GetAllCallable(node, nodeKeys)); } // A list of thrown exceptions, indicating the number of failures List<Throwable> failures = Lists.newArrayList(); List<NodeValue<ByteArray, byte[]>> nodeValues = Lists.newArrayList(); Map<ByteArray, MutableInt> keyToSuccessCount = Maps.newHashMap(); for (ByteArray key : keys) keyToSuccessCount.put(key, new MutableInt(0)); List<Future<GetAllResult>> futures; try { // TODO What to do about timeouts? They should be longer as getAll // is likely to // take longer. At the moment, it's just timeoutMs * 3, but should // this be based on the number of the keys? futures = executor.invokeAll(callables, timeoutMs * 3, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { throw new InsufficientOperationalNodesException("getAll operation interrupted.", e); } for (Future<GetAllResult> f : futures) { if (f.isCancelled()) { logger.warn("Get operation timed out after " + timeoutMs + " ms."); continue; } try { GetAllResult getResult = f.get(); if (getResult.exception != null) { if (getResult.exception instanceof VoldemortApplicationException) { throw (VoldemortException) getResult.exception; } failures.add(getResult.exception); continue; } for (ByteArray key : getResult.callable.nodeKeys) { List<Versioned<byte[]>> retrieved = getResult.retrieved.get(key); MutableInt successCount = keyToSuccessCount.get(key); successCount.increment(); /* * retrieved can be null if there are no values for the key * provided */ if (retrieved != null) { List<Versioned<byte[]>> existing = result.get(key); if (existing == null) result.put(key, Lists.newArrayList(retrieved)); else existing.addAll(retrieved); } } nodeValues.addAll(getResult.nodeValues); } catch (InterruptedException e) { throw new InsufficientOperationalNodesException("getAll operation interrupted.", e); } catch (ExecutionException e) { // We catch all Throwables apart from Error in the callable, so // the else part // should never happen if (e.getCause() instanceof Error) throw (Error) e.getCause(); else logger.error(e.getMessage(), e); } } for (ByteArray key : keys) { MutableInt successCountWrapper = keyToSuccessCount.get(key); int successCount = successCountWrapper.intValue(); if (successCount < storeDef.getPreferredReads()) { List<Node> extraNodes = keyToExtraNodesMap.get(key); if (extraNodes != null) { for (Node node : extraNodes) { long startNs = System.nanoTime(); try { List<Versioned<byte[]>> values = innerStores.get(node.getId()).get(key); fillRepairReadsValues(nodeValues, key, node, values); List<Versioned<byte[]>> versioneds = result.get(key); if (versioneds == null) result.put(key, Lists.newArrayList(values)); else versioneds.addAll(values); recordSuccess(node, startNs); if (++successCount >= storeDef.getPreferredReads()) break; } catch (UnreachableStoreException e) { failures.add(e); recordException(node, startNs, e); } catch (VoldemortApplicationException e) { throw e; } catch (Exception e) { logger.warn("Error in GET_ALL on node " + node.getId() + "(" + node.getHost() + ")", e); failures.add(e); } } } } successCountWrapper.setValue(successCount); } repairReads(nodeValues); for (Map.Entry<ByteArray, MutableInt> mapEntry : keyToSuccessCount.entrySet()) { int successCount = mapEntry.getValue().intValue(); if (successCount < storeDef.getRequiredReads()) throw new InsufficientOperationalNodesException( this.storeDef.getRequiredReads() + " reads required, but " + successCount + " succeeded.", failures); } return result; } public List<Versioned<byte[]>> get(ByteArray key) { Function<List<GetResult<Versioned<byte[]>>>, Void> readRepairFunction = new Function<List<GetResult<Versioned<byte[]>>>, Void>() { public Void apply(List<GetResult<Versioned<byte[]>>> nodeResults) { List<NodeValue<ByteArray, byte[]>> nodeValues = Lists .newArrayListWithExpectedSize(nodeResults.size()); for (GetResult<Versioned<byte[]>> getResult : nodeResults) fillRepairReadsValues(nodeValues, getResult.key, getResult.node, getResult.retrieved); repairReads(nodeValues); return null; } }; return get(key, VERSIONED_OP, readRepairFunction); } /* * 1. Attempt preferredReads, and then wait for these to complete 2. If we * got all the reads we wanted, then we are done. 3. If not then continue * serially attempting to read from each node until we get preferredReads or * run out of nodes. 4. If we have multiple results do a read repair 5. If * we have at least requiredReads return. Otherwise throw an exception. */ private <R> List<R> get(final ByteArray key, StoreOp<R> fetcher, Function<List<GetResult<R>>, Void> preReturnProcedure) throws VoldemortException { StoreUtils.assertValidKey(key); final List<Node> nodes = availableNodes(routingStrategy.routeRequest(key.get())); // quickly fail if there aren't enough nodes to meet the requirement checkRequiredReads(nodes); final List<GetResult<R>> retrieved = Lists.newArrayList(); // A count of the number of successful operations int successes = 0; // A list of thrown exceptions, indicating the number of failures final List<Throwable> failures = Lists.newArrayListWithCapacity(3); // Do the preferred number of reads in parallel int attempts = Math.min(this.storeDef.getPreferredReads(), nodes.size()); int nodeIndex = 0; List<Callable<GetResult<R>>> callables = Lists.newArrayListWithCapacity(attempts); for (; nodeIndex < attempts; nodeIndex++) { final Node node = nodes.get(nodeIndex); callables.add(new GetCallable<R>(node, key, fetcher)); } List<Future<GetResult<R>>> futures; try { futures = executor.invokeAll(callables, timeoutMs, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { throw new InsufficientOperationalNodesException("Get operation interrupted!", e); } for (Future<GetResult<R>> f : futures) { if (f.isCancelled()) { logger.warn("Get operation timed out after " + timeoutMs + " ms."); continue; } try { GetResult<R> getResult = f.get(); if (getResult.exception != null) { if (getResult.exception instanceof VoldemortApplicationException) { throw (VoldemortException) getResult.exception; } failures.add(getResult.exception); continue; } ++successes; retrieved.add(getResult); } catch (InterruptedException e) { throw new InsufficientOperationalNodesException("Get operation interrupted!", e); } catch (ExecutionException e) { // We catch all Throwable subclasses apart from Error in the // callable, so the else // part should never happen. if (e.getCause() instanceof Error) throw (Error) e.getCause(); else logger.error(e.getMessage(), e); } } // Now if we had any failures we will be short a few reads. Do serial // reads to make up for these. while (successes < this.storeDef.getPreferredReads() && nodeIndex < nodes.size()) { Node node = nodes.get(nodeIndex); long startNs = System.nanoTime(); try { retrieved.add( new GetResult<R>(node, key, fetcher.execute(innerStores.get(node.getId()), key), null)); ++successes; recordSuccess(node, startNs); } catch (UnreachableStoreException e) { failures.add(e); recordException(node, startNs, e); } catch (VoldemortApplicationException e) { throw e; } catch (Exception e) { logger.warn("Error in GET on node " + node.getId() + "(" + node.getHost() + ")", e); failures.add(e); } nodeIndex++; } if (logger.isTraceEnabled()) logger.trace("GET retrieved the following node values: " + formatNodeValues(retrieved)); if (preReturnProcedure != null) preReturnProcedure.apply(retrieved); if (successes >= this.storeDef.getRequiredReads()) { List<R> result = Lists.newArrayListWithExpectedSize(retrieved.size()); for (GetResult<R> getResult : retrieved) result.addAll(getResult.retrieved); if (logger.isTraceEnabled()) logger.trace("return " + result.size() + "items from routedstore"); return result; } else throw new InsufficientOperationalNodesException( this.storeDef.getRequiredReads() + " reads required, but " + successes + " succeeded.", failures); } private void fillRepairReadsValues(final List<NodeValue<ByteArray, byte[]>> nodeValues, final ByteArray key, Node node, List<Versioned<byte[]>> fetched) { if (repairReads) { if (fetched.size() == 0) nodeValues.add(nullValue(node, key)); else { for (Versioned<byte[]> f : fetched) nodeValues.add(new NodeValue<ByteArray, byte[]>(node.getId(), key, f)); } } } private NodeValue<ByteArray, byte[]> nullValue(Node node, ByteArray key) { return new NodeValue<ByteArray, byte[]>(node.getId(), key, new Versioned<byte[]>(null)); } private void repairReads(List<NodeValue<ByteArray, byte[]>> nodeValues) { if (!repairReads || nodeValues.size() <= 1 || storeDef.getPreferredReads() <= 1) return; final List<NodeValue<ByteArray, byte[]>> toReadRepair = Lists.newArrayList(); /* * We clone after computing read repairs in the assumption that the * output will be smaller than the input. Note that we clone the * version, but not the key or value as the latter two are not mutated. */ for (NodeValue<ByteArray, byte[]> v : readRepairer.getRepairs(nodeValues)) { Versioned<byte[]> versioned = Versioned.value(v.getVersioned().getValue(), ((VectorClock) v.getVersion()).clone()); toReadRepair.add(new NodeValue<ByteArray, byte[]>(v.getNodeId(), v.getKey(), versioned)); } this.executor.execute(new Runnable() { public void run() { for (NodeValue<ByteArray, byte[]> v : toReadRepair) { try { if (logger.isDebugEnabled()) logger.debug("Doing read repair on node " + v.getNodeId() + " for key '" + v.getKey() + "' with version " + v.getVersion() + "."); innerStores.get(v.getNodeId()).put(v.getKey(), v.getVersioned()); } catch (VoldemortApplicationException e) { if (logger.isDebugEnabled()) logger.debug("Read repair cancelled due to application level exception on node " + v.getNodeId() + " for key '" + v.getKey() + "' with version " + v.getVersion() + ": " + e.getMessage()); } catch (Exception e) { logger.debug("Read repair failed: ", e); } } } }); } private void checkRequiredReads(final List<Node> nodes) throws InsufficientOperationalNodesException { if (nodes.size() < this.storeDef.getRequiredReads()) throw new InsufficientOperationalNodesException("Only " + nodes.size() + " nodes in preference list, but " + this.storeDef.getRequiredReads() + " reads required."); } private <R> String formatNodeValues(List<GetResult<R>> results) { // log all retrieved values StringBuilder builder = new StringBuilder(); builder.append("{"); for (GetResult<?> r : results) { builder.append( "GetResult(nodeId=" + r.node.getId() + ", key=" + r.key + ", retrieved= " + r.retrieved + ")"); builder.append(", "); } builder.append("}"); return builder.toString(); } public String getName() { return this.name; } public void put(final ByteArray key, final Versioned<byte[]> versioned) throws VoldemortException { long startNs = System.nanoTime(); StoreUtils.assertValidKey(key); final List<Node> nodes = availableNodes(routingStrategy.routeRequest(key.get())); // quickly fail if there aren't enough nodes to meet the requirement final int numNodes = nodes.size(); if (numNodes < this.storeDef.getRequiredWrites()) throw new InsufficientOperationalNodesException("Only " + numNodes + " nodes in preference list, but " + this.storeDef.getRequiredWrites() + " writes required."); // A count of the number of successful operations final AtomicInteger successes = new AtomicInteger(0); // A list of thrown exceptions, indicating the number of failures final List<Exception> failures = Collections.synchronizedList(new ArrayList<Exception>(1)); // If requiredWrites > 0 then do a single blocking write to the first // live node in the preference list if this node throws an // ObsoleteVersionException allow it to propagate Node master = null; int currentNode = 0; Versioned<byte[]> versionedCopy = null; for (; currentNode < numNodes; currentNode++) { Node current = nodes.get(currentNode); long startNsLocal = System.nanoTime(); try { versionedCopy = incremented(versioned, current.getId()); innerStores.get(current.getId()).put(key, versionedCopy); successes.getAndIncrement(); recordSuccess(current, startNsLocal); master = current; break; } catch (UnreachableStoreException e) { recordException(current, startNsLocal, e); failures.add(e); } catch (VoldemortApplicationException e) { throw e; } catch (Exception e) { failures.add(e); } } if (successes.get() < 1) throw new InsufficientOperationalNodesException("No master node succeeded!", failures.size() > 0 ? failures.get(0) : null); else currentNode++; // A semaphore indicating the number of completed operations // Once inititialized all permits are acquired, after that // permits are released when an operation is completed. // semaphore.acquire(n) waits for n operations to complete final Versioned<byte[]> finalVersionedCopy = versionedCopy; final Semaphore semaphore = new Semaphore(0, false); // Add the operations to the pool int attempts = 0; for (; currentNode < numNodes; currentNode++) { attempts++; final Node node = nodes.get(currentNode); this.executor.execute(new Runnable() { public void run() { long startNsLocal = System.nanoTime(); try { innerStores.get(node.getId()).put(key, finalVersionedCopy); successes.incrementAndGet(); recordSuccess(node, startNsLocal); } catch (UnreachableStoreException e) { recordException(node, startNsLocal, e); failures.add(e); } catch (ObsoleteVersionException e) { // ignore this completely here // this means that a higher version was able // to write on this node and should be termed as clean // success. } catch (VoldemortApplicationException e) { throw e; } catch (Exception e) { logger.warn("Error in PUT on node " + node.getId() + "(" + node.getHost() + ")", e); failures.add(e); } finally { // signal that the operation is complete semaphore.release(); } } }); } // Block until we get enough completions int blockCount = Math.min(storeDef.getPreferredWrites() - 1, attempts); boolean noTimeout = blockOnPut(startNs, semaphore, 0, blockCount, successes, storeDef.getPreferredWrites()); if (successes.get() < storeDef.getRequiredWrites()) { /* * We don't have enough required writes, but we haven't timed out * yet, so block a little more if there are healthy nodes that can * help us achieve our target. */ if (noTimeout) { int startingIndex = blockCount - 1; blockCount = Math.max(storeDef.getPreferredWrites() - 1, attempts); blockOnPut(startNs, semaphore, startingIndex, blockCount, successes, storeDef.getRequiredWrites()); } if (successes.get() < storeDef.getRequiredWrites()) throw new InsufficientOperationalNodesException(successes.get() + " writes succeeded, but " + this.storeDef.getRequiredWrites() + " are required.", failures); } // Okay looks like it worked, increment the version for the caller VectorClock versionedClock = (VectorClock) versioned.getVersion(); versionedClock.incrementVersion(master.getId(), time.getMilliseconds()); } /** * @return false if the operation timed out, true otherwise. */ private boolean blockOnPut(long startNs, Semaphore semaphore, int startingIndex, int blockCount, AtomicInteger successes, int successesRequired) { for (int i = startingIndex; i < blockCount; i++) { try { long ellapsedNs = System.nanoTime() - startNs; long remainingNs = (timeoutMs * Time.NS_PER_MS) - ellapsedNs; boolean acquiredPermit = semaphore.tryAcquire(Math.max(remainingNs, 0), TimeUnit.NANOSECONDS); if (!acquiredPermit) { logger.warn("Timed out waiting for put # " + (i + 1) + " of " + blockCount + " to succeed."); return false; } if (successes.get() >= successesRequired) break; } catch (InterruptedException e) { throw new InsufficientOperationalNodesException("Put operation interrupted", e); } } return true; } private Versioned<byte[]> incremented(Versioned<byte[]> versioned, int nodeId) { return new Versioned<byte[]>(versioned.getValue(), ((VectorClock) versioned.getVersion()).incremented(nodeId, time.getMilliseconds())); } private List<Node> availableNodes(List<Node> list) { List<Node> available = new ArrayList<Node>(list.size()); for (Node node : list) if (failureDetector.isAvailable(node)) available.add(node); return available; } public void close() { this.executor.shutdown(); try { if (!this.executor.awaitTermination(10, TimeUnit.SECONDS)) this.executor.shutdownNow(); } catch (InterruptedException e) { // okay, fine, playing nice didn't work this.executor.shutdownNow(); } VoldemortException exception = null; for (Store<?, ?> client : innerStores.values()) { try { client.close(); } catch (VoldemortException v) { exception = v; } } if (exception != null) throw exception; } public Map<Integer, Store<ByteArray, byte[]>> getInnerStores() { return this.innerStores; } public Object getCapability(StoreCapabilityType capability) { switch (capability) { case ROUTING_STRATEGY: return this.routingStrategy; case READ_REPAIRER: return this.readRepairer; case VERSION_INCREMENTING: return true; default: throw new NoSuchCapabilityException(capability, getName()); } } public List<Version> getVersions(ByteArray key) { return get(key, VERSION_OP, null); } private void recordException(Node node, long startNs, UnreachableStoreException e) { failureDetector.recordException(node, (System.nanoTime() - startNs) / Time.NS_PER_MS, e); } private void recordSuccess(Node node, long startNs) { failureDetector.recordSuccess(node, (System.nanoTime() - startNs) / Time.NS_PER_MS); } private final class GetCallable<R> implements Callable<GetResult<R>> { private final Node node; private final ByteArray key; private final StoreOp<R> fetcher; public GetCallable(Node node, ByteArray key, StoreOp<R> fetcher) { this.node = node; this.key = key; this.fetcher = fetcher; } public GetResult<R> call() throws Exception { List<R> fetched = Collections.emptyList(); Throwable exception = null; long startNs = System.nanoTime(); try { if (logger.isTraceEnabled()) logger.trace("Attempting get operation on node " + node.getId() + " for key '" + ByteUtils.toHexString(key.get()) + "'."); fetched = fetcher.execute(innerStores.get(node.getId()), key); recordSuccess(node, startNs); } catch (UnreachableStoreException e) { exception = e; recordException(node, startNs, e); } catch (Throwable e) { if (e instanceof Error) throw (Error) e; logger.warn("Error in GET on node " + node.getId() + "(" + node.getHost() + ")", e); exception = e; } return new GetResult<R>(node, key, fetched, exception); } } private final static class GetResult<R> { final Node node; final ByteArray key; final List<R> retrieved; final Throwable exception; public GetResult(Node node, ByteArray key, List<R> retrieved, Throwable exception) { this.node = node; this.key = key; this.retrieved = retrieved; this.exception = exception; } } private final class GetAllCallable implements Callable<GetAllResult> { private final Node node; private final Collection<ByteArray> nodeKeys; private GetAllCallable(Node node, Collection<ByteArray> nodeKeys) { this.node = node; this.nodeKeys = nodeKeys; } public GetAllResult call() { Map<ByteArray, List<Versioned<byte[]>>> retrieved = Collections.emptyMap(); Throwable exception = null; List<NodeValue<ByteArray, byte[]>> nodeValues = Lists.newArrayList(); long startNs = System.nanoTime(); try { retrieved = innerStores.get(node.getId()).getAll(nodeKeys); if (repairReads) { for (Map.Entry<ByteArray, List<Versioned<byte[]>>> entry : retrieved.entrySet()) fillRepairReadsValues(nodeValues, entry.getKey(), node, entry.getValue()); for (ByteArray nodeKey : nodeKeys) { if (!retrieved.containsKey(nodeKey)) fillRepairReadsValues(nodeValues, nodeKey, node, Collections.<Versioned<byte[]>>emptyList()); } } recordSuccess(node, startNs); } catch (UnreachableStoreException e) { exception = e; recordException(node, startNs, e); } catch (Throwable e) { if (e instanceof Error) throw (Error) e; exception = e; logger.warn("Error in GET on node " + node.getId() + "(" + node.getHost() + ")", e); } return new GetAllResult(this, retrieved, nodeValues, exception); } } private static class GetAllResult { final GetAllCallable callable; final Map<ByteArray, List<Versioned<byte[]>>> retrieved; /* Note that this can never be an Error subclass */ final Throwable exception; final List<NodeValue<ByteArray, byte[]>> nodeValues; private GetAllResult(GetAllCallable callable, Map<ByteArray, List<Versioned<byte[]>>> retrieved, List<NodeValue<ByteArray, byte[]>> nodeValues, Throwable exception) { this.callable = callable; this.exception = exception; this.retrieved = retrieved; this.nodeValues = nodeValues; } } private interface StoreOp<R> { List<R> execute(Store<ByteArray, byte[]> store, ByteArray key); } }