List of usage examples for com.google.common.collect Iterables skip
public static <T> Iterable<T> skip(final Iterable<T> iterable, final int numberToSkip)
From source file:org.calrissian.mango.collect.CloseableIterables.java
/** * Returns a view of {@code iterable} that skips its first * {@code numberToSkip} elements. If {@code iterable} contains fewer than * {@code numberToSkip} elements, the returned closeableiterable skips all of its * elements.// w w w . ja v a 2 s . c om * * <p>Modifications to the underlying {@link CloseableIterable} before a call to * {@code iterator()} are reflected in the returned iterator. That is, the * iterator skips the first {@code numberToSkip} elements that exist when the * {@code Iterator} is created, not when {@code skip()} is called. * * <p>The returned closeableiterable's iterator supports {@code remove()} if the * iterator of the underlying iterable supports it. Note that it is * <i>not</i> possible to delete the last skipped element by immediately * calling {@code remove()} on that iterator, as the {@code Iterator} * contract states that a call to {@code remove()} before a call to * {@code next()} will throw an {@link IllegalStateException}. */ public static <T> CloseableIterable<T> skip(final CloseableIterable<T> iterable, final int numberToSkip) { return wrap(Iterables.skip(iterable, numberToSkip), iterable); }
From source file:com.wrmsr.wava.yen.parser.ModuleFactory.java
public YModule create() { functionCounter = 0;//w w w. j a v a2 s . c o m for (Element e : Iterables.skip(root, 1)) { ListElement le = (ListElement) e; preParseFunctionType(le); preParseImports(le); } functionCounter = 0; for (Element e : Iterables.skip(root, 1)) { ListElement le = (ListElement) e; parseModuleElement(le); } return builder.build(); }
From source file:com.google.enterprise.connector.util.diffing.SnapshotStore.java
@VisibleForTesting public void deleteOldSnapshots() { // Leave at least two snapshot files, even if oldestSnapshotToKeep // is too high. for (long k : Iterables.skip(getExistingSnapshots(), 2)) { if (k < oldestSnapshotToKeep) { File x = getSnapshotFile(snapshotDir, k); if (x.delete()) { LOG.fine("deleting snapshot file " + x.getAbsolutePath()); } else { LOG.warning("failed to delete snapshot file " + x.getAbsolutePath()); }/*from ww w . ja v a 2 s. c o m*/ } } }
From source file:msi.gaml.factories.ModelAssembler.java
public ModelDescription assemble(final String projectPath, final String modelPath, final Iterable<ISyntacticElement> allModels, final ValidationContext collector, final boolean document, final Map<String, ModelDescription> mm) { final ImmutableList<ISyntacticElement> models = ImmutableList.copyOf(allModels); final TOrderedHashMap<String, ISyntacticElement> speciesNodes = new TOrderedHashMap(); final TOrderedHashMap<String, TOrderedHashMap<String, ISyntacticElement>>[] experimentNodes = new TOrderedHashMap[1]; final ISyntacticElement globalNodes = SyntacticFactory.create(GLOBAL, (EObject) null, true); final ISyntacticElement source = models.get(0); Facets globalFacets = null;//from w w w. ja v a 2s. c om if (source.hasFacet(IKeyword.PRAGMA)) { final Facets facets = source.copyFacets(null); final List<String> pragmas = (List<String>) facets.get(IKeyword.PRAGMA).getExpression().getConstValue(); collector.resetInfoAndWarning(); if (pragmas != null) { if (pragmas.contains(IKeyword.NO_INFO)) { collector.setNoInfo(); } if (pragmas.contains(IKeyword.NO_WARNING)) { collector.setNoWarning(); } if (pragmas.contains(IKeyword.NO_EXPERIMENT)) { collector.setNoExperiment(); } } } final Map<String, SpeciesDescription> tempSpeciesCache = new THashMap<>(); for (final ISyntacticElement cm : models.reverse()) { final SyntacticModelElement currentModel = (SyntacticModelElement) cm; if (currentModel != null) { if (currentModel.hasFacets()) { if (globalFacets == null) { globalFacets = new Facets(currentModel.copyFacets(null)); } else { globalFacets.putAll(currentModel.copyFacets(null)); } } currentModel.visitChildren(element -> globalNodes.addChild(element)); SyntacticVisitor visitor = element -> addSpeciesNode(element, speciesNodes, collector); currentModel.visitSpecies(visitor); // We input the species so that grids are always the last ones // (see DiffusionStatement) currentModel.visitGrids(visitor); visitor = element -> { if (experimentNodes[0] == null) { experimentNodes[0] = new TOrderedHashMap(); } addExperimentNode(element, currentModel.getName(), experimentNodes[0], collector); }; currentModel.visitExperiments(visitor); } } final String modelName = buildModelName(source.getName()); // We build a list of working paths from which the composite model will // be able to look for resources. These working paths come from the // imported models Set<String> absoluteAlternatePathAsStrings = models.isEmpty() ? null : ImmutableSet.copyOf( Iterables.transform(models.reverse(), each -> ((SyntacticModelElement) each).getPath())); if (mm != null) { for (final ModelDescription m1 : mm.values()) { for (final String im : m1.getAlternatePaths()) { absoluteAlternatePathAsStrings = Sets.union(absoluteAlternatePathAsStrings, Collections.singleton(im)); } } } final ModelDescription model = new ModelDescription(modelName, null, projectPath, modelPath, source.getElement(), null, ModelDescription.ROOT, null, globalFacets, collector, absoluteAlternatePathAsStrings); final Collection<String> allModelNames = models.size() == 1 ? null : ImmutableSet.copyOf( Iterables.transform(Iterables.skip(models, 1), each -> buildModelName(each.getName()))); model.setImportedModelNames(allModelNames); model.isDocumenting(document); // hqnghi add micro-models if (mm != null) { // model.setMicroModels(mm); model.addChildren(mm.values()); } // end-hqnghi // recursively add user-defined species to world and down on to the // hierarchy speciesNodes.forEachValue(speciesNode -> { addMicroSpecies(model, speciesNode, tempSpeciesCache); return true; }); if (experimentNodes[0] != null) { experimentNodes[0].forEachEntry((s, b) -> { b.forEachValue(experimentNode -> { addExperiment(s, model, experimentNode, tempSpeciesCache); return true; }); return true; }); } // Parent the species and the experiments of the model (all are now // known). speciesNodes.forEachValue(speciesNode -> { parentSpecies(model, speciesNode, model, tempSpeciesCache); return true; }); if (experimentNodes[0] != null) { experimentNodes[0].forEachEntry((s, b) -> { b.forEachValue(experimentNode -> { parentExperiment(model, experimentNode); return true; }); return true; }); } // Initialize the hierarchy of types model.buildTypes(); // hqnghi build micro-models as types if (mm != null) { for (final Entry<String, ModelDescription> entry : mm.entrySet()) { model.getTypesManager().alias(entry.getValue().getName(), entry.getKey()); } // end-hqnghi } // Make species and experiments recursively create their attributes, // actions.... complementSpecies(model, globalNodes); speciesNodes.forEachValue(speciesNode -> { complementSpecies(model.getMicroSpecies(speciesNode.getName()), speciesNode); return true; }); if (experimentNodes[0] != null) { experimentNodes[0].forEachEntry((s, b) -> { b.forEachValue(experimentNode -> { complementSpecies(model.getExperiment(experimentNode.getName()), experimentNode); return true; }); return true; }); } // Complement recursively the different species (incl. the world). The // recursion is hierarchical model.inheritFromParent(); for (final SpeciesDescription sd : getSpeciesInHierarchicalOrder(model)) { sd.inheritFromParent(); if (sd.isExperiment()) { if (!sd.finalizeDescription()) { return null; } } } // Issue #1708 (put before the finalization) if (model.hasFacet(SCHEDULES) || model.hasFacet(FREQUENCY)) { createSchedulerSpecies(model); } if (!model.finalizeDescription()) { return null; } if (document) { collector.document(model); } return model; }
From source file:org.apache.mahout.knn.cluster.BallKMeans.java
/** * Selects some of the original points according to the k-means++ algorithm. The basic idea is that * points are selected with probability proportional to their distance from any selected point. In * this version, points have weights which multiply their likelihood of being selected. This is the * same as if there were as many copies of the same point as indicated by the weight. * <p/>/*w ww. j a v a 2s .c o m*/ * This is pretty expensive, but it vastly improves the quality and convergences of the k-means algorithm. * The basic idea can be made much faster by only processing a random subset of the original points. * In the context of streaming k-means, the total number of possible seeds will be about k log n so this * selection will cost O(k^2 (log n)^2) which isn't much worse than the random sampling idea. At * n = 10^9, the cost of this initialization will be about 10x worse than a reasonable random sampling * implementation. * <p/> * The side effect of this method is to fill the centroids structure. * itself. * * @param datapoints The datapoints to select from. These datapoints should be WeightedVectors of some kind. */ private void initializeSeeds(List<? extends WeightedVector> datapoints) { Preconditions.checkArgument(datapoints.size() > 1, "Must have at least two datapoints points to cluster " + "sensibly"); // Compute the centroid of all of the datapoints. This is then used to compute the squared radius of the datapoints. Centroid center = new Centroid(datapoints.iterator().next()); for (WeightedVector row : Iterables.skip(datapoints, 1)) { center.update(row); } // Given the centroid, we can compute \Delta_1^2(X), the total squared distance for the datapoints // this accelerates seed selection. double radius = 0; DistanceMeasure l2 = new SquaredEuclideanDistanceMeasure(); for (WeightedVector row : datapoints) { radius += l2.distance(row, center); } // Find the first seed c_1 (and conceptually the second, c_2) as might be done in the 2-means clustering so that // the probability of selecting c_1 and c_2 is proportional to || c_1 - c_2 ||^2. This is done // by first selecting c_1 with probability: // // p(c_1) = sum_{c_1} || c_1 - c_2 ||^2 \over sum_{c_1, c_2} || c_1 - c_2 ||^2 // // This can be simplified to: // // p(c_1) = \Delta_1^2(X) + n || c_1 - c ||^2 / (2 n \Delta_1^2(X)) // // where c = \sum x / n and \Delta_1^2(X) = sum || x - c ||^2 // // All subsequent seeds c_i (including c_2) can then be selected from the remaining points with probability // proportional to Pr(c_i == x_j) = min_{m < i} || c_m - x_j ||^2. // Multinomial distribution of vector indices for the selection seeds. These correspond to // the indices of the vectors in the original datapoints list. Multinomial<Integer> seedSelector = new Multinomial<Integer>(); for (int i = 0; i < datapoints.size(); ++i) { double selectionProbability = radius + datapoints.size() * l2.distance(datapoints.get(i), center); seedSelector.add(i, selectionProbability); } Centroid c_1 = new Centroid(datapoints.get(seedSelector.sample()).clone()); c_1.setIndex(0); // Construct a set of weighted things which can be used for random selection. Initial weights are // set to the squared distance from c_1 for (int i = 0; i < datapoints.size(); ++i) { WeightedVector row = datapoints.get(i); final double w = l2.distance(c_1, row) * row.getWeight(); seedSelector.set(i, w); } // From here, seeds are selected with probability proportional to: // // r_i = min_{c_j} || x_i - c_j ||^2 // // when we only have c_1, we have already set these distances and as we select each new // seed, we update the minimum distances. centroids.add(c_1); int clusterIndex = 1; while (centroids.size() < numClusters) { // Select according to weights. int seedIndex = seedSelector.sample(); Centroid nextSeed = new Centroid(datapoints.get(seedIndex)); // (WeightedVector)datapoints.get(seedIndex).clone()); nextSeed.setIndex(clusterIndex++); centroids.add(nextSeed); // Don't select this one again. seedSelector.delete(seedIndex); // Re-weight everything according to the minimum distance to a seed. for (int currSeedIndex : seedSelector) { WeightedVector curr = datapoints.get(currSeedIndex); double newWeight = nextSeed.getWeight() * l2.distance(nextSeed, curr); if (newWeight < seedSelector.getWeight(currSeedIndex)) { seedSelector.set(currSeedIndex, newWeight); } } } }
From source file:co.cask.cdap.logging.read.AvroFileReader.java
public Collection<LogEvent> readLogPrev(Location file, Filter logFilter, long fromTimeMs, final int maxEvents) { try {//from ww w .j a v a 2 s. c o m DataFileReader<GenericRecord> dataFileReader = createReader(file); try { if (!dataFileReader.hasNext()) { return ImmutableList.of(); } GenericRecord datum; List<List<LogEvent>> logSegments = Lists.newArrayList(); int count = 0; // Calculate skipLen based on fileLength long skipLen = file.length() / 10; if (skipLen > DEFAULT_SKIP_LEN) { skipLen = DEFAULT_SKIP_LEN; } else if (skipLen <= 0) { skipLen = DEFAULT_SKIP_LEN; } List<LogEvent> logSegment = Lists.newArrayList(); long lastSeekPos; long seekPos = file.length(); while (seekPos > 0) { lastSeekPos = seekPos; seekPos = seekPos < skipLen ? 0 : seekPos - skipLen; dataFileReader.sync(seekPos); logSegment = logSegment.isEmpty() ? logSegment : Lists.<LogEvent>newArrayList(); // read all the elements in the current segment (seekPos up to lastSeekPos) while (dataFileReader.hasNext() && !dataFileReader.pastSync(lastSeekPos)) { datum = dataFileReader.next(); ILoggingEvent loggingEvent = LoggingEvent.decode(datum); // Stop when reached fromTimeMs if (loggingEvent.getTimeStamp() > fromTimeMs) { break; } if (logFilter.match(loggingEvent)) { ++count; logSegment.add(new LogEvent(loggingEvent, new LogOffset(LogOffset.INVALID_KAFKA_OFFSET, loggingEvent.getTimeStamp()))); } } if (!logSegment.isEmpty()) { logSegments.add(logSegment); } if (count > maxEvents) { break; } } int skip = count >= maxEvents ? count - maxEvents : 0; return Lists.newArrayList(Iterables.skip(Iterables.concat(Lists.reverse(logSegments)), skip)); } finally { try { dataFileReader.close(); } catch (IOException e) { LOG.error(String.format("Got exception while closing log file %s", file.toURI()), e); } } } catch (Exception e) { LOG.error(String.format("Got exception while reading log file %s", file.toURI()), e); throw Throwables.propagate(e); } }
From source file:co.cask.cdap.logging.read.AvroFileLogReader.java
public Collection<LogEvent> readLogPrev(Location file, Filter logFilter, long fromTimeMs, final int maxEvents) { try {/*from w ww . jav a2s .c om*/ DataFileReader<GenericRecord> dataFileReader = createReader(file); try { if (!dataFileReader.hasNext()) { return ImmutableList.of(); } GenericRecord datum; List<List<LogEvent>> logSegments = Lists.newArrayList(); int count = 0; // Calculate skipLen based on fileLength long skipLen = file.length() / 10; if (skipLen > DEFAULT_SKIP_LEN) { skipLen = DEFAULT_SKIP_LEN; } else if (skipLen <= 0) { skipLen = DEFAULT_SKIP_LEN; } List<LogEvent> logSegment = Lists.newArrayList(); long boundaryTimeMs = Long.MAX_VALUE; long seekPos = file.length(); while (seekPos > 0) { seekPos = seekPos < skipLen ? 0 : seekPos - skipLen; dataFileReader.sync(seekPos); logSegment = logSegment.isEmpty() ? logSegment : Lists.<LogEvent>newArrayList(); long segmentStartTimeMs = Long.MAX_VALUE; while (dataFileReader.hasNext()) { datum = dataFileReader.next(); ILoggingEvent loggingEvent = LoggingEvent.decode(datum); if (segmentStartTimeMs == Long.MAX_VALUE) { segmentStartTimeMs = loggingEvent.getTimeStamp(); } // Stop when reached fromTimeMs, or at the end of current segment. if (loggingEvent.getTimeStamp() > fromTimeMs || loggingEvent.getTimeStamp() >= boundaryTimeMs) { break; } if (logFilter.match(loggingEvent)) { ++count; logSegment.add(new LogEvent(loggingEvent, loggingEvent.getTimeStamp())); } } boundaryTimeMs = segmentStartTimeMs; if (!logSegment.isEmpty()) { logSegments.add(logSegment); } if (count > maxEvents) { break; } } int skip = count >= maxEvents ? count - maxEvents : 0; return Lists.newArrayList(Iterables.skip(Iterables.concat(Lists.reverse(logSegments)), skip)); } finally { try { dataFileReader.close(); } catch (IOException e) { LOG.error(String.format("Got exception while closing log file %s", file.toURI()), e); } } } catch (Exception e) { LOG.error(String.format("Got exception while reading log file %s", file.toURI()), e); throw Throwables.propagate(e); } }
From source file:com.blackducksoftware.bdio.model.ExternalIdentifier.java
@Nullable public String getSuiteReleaseTag() { return Iterables.getFirst(Iterables.skip(getBdSuiteId(), 1), null); }
From source file:co.cask.cdap.logging.write.LogLocation.java
/** * Return closeable iterator of {@link LogEvent} * @param logFilter filter for filtering log events * @param fromTimeMs start timestamp in millis * @param maxEvents max events to return * @return closeable iterator of previous log events *//*from w ww .j a va 2s . c o m*/ @SuppressWarnings("WeakerAccess") public Collection<LogEvent> readLogPrev(Filter logFilter, long fromTimeMs, final int maxEvents) throws IOException { DataFileReader<GenericRecord> dataFileReader = createReader(); try { if (!dataFileReader.hasNext()) { return ImmutableList.of(); } List<List<LogEvent>> logSegments = Lists.newArrayList(); List<LogEvent> logSegment; int count = 0; // Calculate skipLen based on fileLength long length = location.length(); LOG.trace("Got file length {}", length); long skipLen = length / 10; if (skipLen > DEFAULT_SKIP_LEN || skipLen <= 0) { skipLen = DEFAULT_SKIP_LEN; } // For open file, endPosition sync marker is unknown so start from file length and read up to the actual EOF dataFileReader.sync(length); long finalSync = dataFileReader.previousSync(); logSegment = readToEndSyncPosition(dataFileReader, logFilter, fromTimeMs, -1); if (!logSegment.isEmpty()) { logSegments.add(logSegment); count = count + logSegment.size(); } LOG.trace("Read logevents {} from position {}", count, finalSync); long startPosition = finalSync; long endPosition = startPosition; long currentSync; while (startPosition > 0 && count < maxEvents) { // Skip to sync position less than current sync position startPosition = skipToPosition(dataFileReader, startPosition, endPosition, skipLen); currentSync = dataFileReader.previousSync(); logSegment = readToEndSyncPosition(dataFileReader, logFilter, fromTimeMs, endPosition); if (!logSegment.isEmpty()) { logSegments.add(logSegment); count = count + logSegment.size(); } LOG.trace("Read logevents {} from position {} to endPosition {}", count, currentSync, endPosition); endPosition = currentSync; } int skip = count >= maxEvents ? count - maxEvents : 0; return Lists.newArrayList(Iterables.skip(Iterables.concat(Lists.reverse(logSegments)), skip)); } finally { try { dataFileReader.close(); } catch (IOException e) { LOG.error("Got exception while closing log file {}", location, e); } } }
From source file:org.apache.mahout.knn.cluster.StreamingKMeans.java
private UpdatableSearcher clusterInternal(Iterable<Centroid> datapoints, boolean collapseClusters) { int oldNumProcessedDataPoints = numProcessedDatapoints; // We clear the centroids we have in case of cluster collapse, the old clusters are the // datapoints but we need to re-cluster them. if (collapseClusters) { centroids.clear();/* ww w . j a v a 2 s .c om*/ numProcessedDatapoints = 0; } int numCentroidsToSkip = 0; if (centroids.size() == 0) { // Assign the first datapoint to the first cluster. // Adding a vector to a searcher would normally just reference the copy, // but we could potentially mutate it and so we need to make a clone. centroids.add(Iterables.get(datapoints, 0).clone()); numCentroidsToSkip = 1; ++numProcessedDatapoints; } Random rand = RandomUtils.getRandom(); // To cluster, we scan the data and either add each point to the nearest group or create a new group. // when we get too many groups, we need to increase the threshold and rescan our current groups for (WeightedVector row : Iterables.skip(datapoints, numCentroidsToSkip)) { // Get the closest vector and its weight as a WeightedThing<Vector>. // The weight of the WeightedThing is the distance to the query and the value is a // reference to one of the vectors we added to the searcher previously. WeightedThing<Vector> closestPair = centroids.search(row, 1).get(0); // We get a uniformly distributed random number between 0 and 1 and compare it with the // distance to the closest cluster divided by the distanceCutoff. // This is so that if the closest cluster is further than distanceCutoff, // closestPair.getWeight() / distanceCutoff > 1 which will trigger the creation of a new // cluster anyway. // However, if the ratio is less than 1, we want to create a new cluster with probability // proportional to the distance to the closest cluster. if (rand.nextDouble() < closestPair.getWeight() / distanceCutoff) { // Add new centroid, note that the vector is copied because we may mutate it later. centroids.add(row.clone()); } else { // Merge the new point with the existing centroid. This will update the centroid's actual // position. // We know that all the points we inserted in the centroids searcher are (or extend) // WeightedVector, so the cast will always succeed. Centroid centroid = (Centroid) closestPair.getValue(); // We will update the centroid by removing it from the searcher and reinserting it to // ensure consistency. if (!centroids.remove(centroid, 1e-7)) { throw new RuntimeException("Unable to remove centroid"); } centroid.update(row); centroids.add(centroid); } progressLogger.debug( "numProcessedDataPoints: {}, estimatedNumClusters: {}, " + "distanceCutoff: {}, numCentroids: {}", numProcessedDatapoints, estimatedNumClusters, distanceCutoff, centroids.size()); if (!collapseClusters && centroids.size() > estimatedNumClusters) { estimatedNumClusters = (int) Math.max(estimatedNumClusters, clusterLogFactor * Math.log(numProcessedDatapoints)); // TODO does shuffling help? List<Centroid> shuffled = Lists.newArrayList(); for (Vector v : centroids) { shuffled.add((Centroid) v); } Collections.shuffle(shuffled); // Re-cluster using the shuffled centroids as data points. The centroids member variable // is modified directly. clusterInternal(shuffled, true); // In the original algorithm, with distributions with sharp scale effects, the // distanceCutoff can grow to excessive size leading sub-clustering to collapse // the centroids set too much. This test prevents increase in distanceCutoff if // the current value is doing well at collapsing the clusters. if (centroids.size() > clusterOvershoot * estimatedNumClusters) { distanceCutoff *= beta; } } ++numProcessedDatapoints; } if (collapseClusters) { numProcessedDatapoints = oldNumProcessedDataPoints; } // Normally, iterating through the searcher produces Vectors, // but since we always used Centroids, we adapt the return type. return centroids; }