List of usage examples for java.util Collections shuffle
@SuppressWarnings({ "rawtypes", "unchecked" }) public static void shuffle(List<?> list, Random rnd)
From source file:org.elasticsearch.discovery.ec2.Ec2DiscoveryClusterFormationTests.java
/** * Creates mock EC2 endpoint providing the list of started nodes to the DescribeInstances API call *///w w w . j a va 2 s .co m @BeforeClass public static void startHttpd() throws Exception { logDir = createTempDir(); httpServer = MockHttpServer .createHttp(new InetSocketAddress(InetAddress.getLoopbackAddress().getHostAddress(), 0), 0); httpServer.createContext("/", (s) -> { Headers headers = s.getResponseHeaders(); headers.add("Content-Type", "text/xml; charset=UTF-8"); String action = null; for (NameValuePair parse : URLEncodedUtils.parse(IOUtils.toString(s.getRequestBody()), StandardCharsets.UTF_8)) { if ("Action".equals(parse.getName())) { action = parse.getValue(); break; } } assertThat(action, equalTo("DescribeInstances")); XMLOutputFactory xmlOutputFactory = XMLOutputFactory.newFactory(); xmlOutputFactory.setProperty(XMLOutputFactory.IS_REPAIRING_NAMESPACES, true); StringWriter out = new StringWriter(); XMLStreamWriter sw; try { sw = xmlOutputFactory.createXMLStreamWriter(out); sw.writeStartDocument(); String namespace = "http://ec2.amazonaws.com/doc/2013-02-01/"; sw.setDefaultNamespace(namespace); sw.writeStartElement(XMLConstants.DEFAULT_NS_PREFIX, "DescribeInstancesResponse", namespace); { sw.writeStartElement("requestId"); sw.writeCharacters(UUID.randomUUID().toString()); sw.writeEndElement(); sw.writeStartElement("reservationSet"); { Path[] files = FileSystemUtils.files(logDir); for (int i = 0; i < files.length; i++) { Path resolve = files[i].resolve("transport.ports"); if (Files.exists(resolve)) { List<String> addresses = Files.readAllLines(resolve); Collections.shuffle(addresses, random()); sw.writeStartElement("item"); { sw.writeStartElement("reservationId"); sw.writeCharacters(UUID.randomUUID().toString()); sw.writeEndElement(); sw.writeStartElement("instancesSet"); { sw.writeStartElement("item"); { sw.writeStartElement("instanceId"); sw.writeCharacters(UUID.randomUUID().toString()); sw.writeEndElement(); sw.writeStartElement("imageId"); sw.writeCharacters(UUID.randomUUID().toString()); sw.writeEndElement(); sw.writeStartElement("instanceState"); { sw.writeStartElement("code"); sw.writeCharacters("16"); sw.writeEndElement(); sw.writeStartElement("name"); sw.writeCharacters("running"); sw.writeEndElement(); } sw.writeEndElement(); sw.writeStartElement("privateDnsName"); sw.writeCharacters(addresses.get(0)); sw.writeEndElement(); sw.writeStartElement("dnsName"); sw.writeCharacters(addresses.get(0)); sw.writeEndElement(); sw.writeStartElement("instanceType"); sw.writeCharacters("m1.medium"); sw.writeEndElement(); sw.writeStartElement("placement"); { sw.writeStartElement("availabilityZone"); sw.writeCharacters("use-east-1e"); sw.writeEndElement(); sw.writeEmptyElement("groupName"); sw.writeStartElement("tenancy"); sw.writeCharacters("default"); sw.writeEndElement(); } sw.writeEndElement(); sw.writeStartElement("privateIpAddress"); sw.writeCharacters(addresses.get(0)); sw.writeEndElement(); sw.writeStartElement("ipAddress"); sw.writeCharacters(addresses.get(0)); sw.writeEndElement(); } sw.writeEndElement(); } sw.writeEndElement(); } sw.writeEndElement(); } } } sw.writeEndElement(); } sw.writeEndElement(); sw.writeEndDocument(); sw.flush(); final byte[] responseAsBytes = out.toString().getBytes(StandardCharsets.UTF_8); s.sendResponseHeaders(200, responseAsBytes.length); OutputStream responseBody = s.getResponseBody(); responseBody.write(responseAsBytes); responseBody.close(); } catch (XMLStreamException e) { Loggers.getLogger(Ec2DiscoveryClusterFormationTests.class).error("Failed serializing XML", e); throw new RuntimeException(e); } }); httpServer.start(); }
From source file:de.julielab.jtbd.TokenizerApplication.java
/** * perform cross validation/*from ww w.j av a 2 s . c o m*/ * * @param n * number of splits * @param orgSentencesFile * @param tokSentencesFile * @param errors * @param predictions * @return */ private static double doCrossEvaluation(final int n, final File orgSentencesFile, final File tokSentencesFile, final ArrayList<String> errors, final ArrayList<String> predictions) { final ArrayList<String> orgSentences = readFile(orgSentencesFile); final ArrayList<String> tokSentences = readFile(tokSentencesFile); final long seed = 1; Collections.shuffle(orgSentences, new Random(seed)); Collections.shuffle(tokSentences, new Random(seed)); int pos = 0; final int sizeRound = orgSentences.size() / n; final int sizeAll = orgSentences.size(); final int sizeLastRound = sizeRound + (sizeAll % n); System.out.println("number of files in directory: " + sizeAll); System.out.println("size of each/last round: " + sizeRound + "/" + sizeLastRound); System.out.println(); final EvalResult[] er = new EvalResult[n]; // double avgAcc = 0; double avgF = 0; for (int i = 0; i < n; i++) { // in each round final ArrayList<String> predictOrgSentences = new ArrayList<String>(); final ArrayList<String> predictTokSentences = new ArrayList<String>(); final ArrayList<String> trainOrgSentences = new ArrayList<String>(); final ArrayList<String> trainTokSentences = new ArrayList<String>(); if (i == (n - 1)) { // last round for (int j = 0; j < orgSentences.size(); j++) if (j < pos) { trainOrgSentences.add(orgSentences.get(j)); trainTokSentences.add(tokSentences.get(j)); } else { predictOrgSentences.add(orgSentences.get(j)); predictTokSentences.add(tokSentences.get(j)); } } else { // other rounds for (int j = 0; j < orgSentences.size(); j++) if ((j < pos) || (j >= (pos + sizeRound))) { // System.out.println(j + " - add to train"); trainOrgSentences.add(orgSentences.get(j)); trainTokSentences.add(tokSentences.get(j)); } else { predictOrgSentences.add(orgSentences.get(j)); predictTokSentences.add(tokSentences.get(j)); } pos += sizeRound; } // now evaluate for this round System.out.println("training size: " + trainOrgSentences.size()); System.out.println("prediction size: " + predictOrgSentences.size()); er[i] = doEvaluation(trainOrgSentences, trainTokSentences, predictOrgSentences, predictTokSentences, predictions, errors); } final DecimalFormat df = new DecimalFormat("0.000"); for (int i = 0; i < er.length; i++) { avgAcc += er[i].ACC; avgF += er[i].getF(); System.out.println("ACC in round " + i + ": " + df.format(er[i].ACC)); } avgAcc = avgAcc / n; avgF = avgF / n; System.out.println("\n\n------------------------------------"); System.out.println("avg accuracy: " + df.format(avgAcc)); System.out.println("avg F-score: " + df.format(avgF)); System.out.println("------------------------------------"); return avgAcc; }
From source file:ldbc.snb.datagen.generator.BTERKnowsGenerator.java
private void generateRemainingEdges() { LinkedList<Integer> stubs = new LinkedList<Integer>(); for (int i = 0; i < graphSize; ++i) { long difference = expectedDegree[i] - adjacencyMatrix[i].getCardinality(); if (difference > 0) { for (int j = 0; j < difference; ++j) { stubs.add(i);//from w ww.j a va 2 s.c o m } } } Collections.shuffle(stubs, random); while (!stubs.isEmpty()) { int node1 = stubs.get(0); stubs.remove(0); if (!stubs.isEmpty()) { int node2 = stubs.get(0); stubs.remove(0); if (node1 != node2) { adjacencyMatrix[node1].add(node2); adjacencyMatrix[node2].add(node1); } } } }
From source file:com.linkedin.pinot.common.query.gen.AvroQueryGenerator.java
public List<TestSimpleAggreationQuery> giveMeNSimpleAggregationQueries(int n) { Collections.shuffle(aggregationQueries, new Random(System.currentTimeMillis())); if (n <= aggregationQueries.size()) { return aggregationQueries.subList(0, n); }// ww w . j av a 2 s .c om return aggregationQueries; }
From source file:net.pms.util.UMSUtils.java
public static void sort(List<File> files, int method) { switch (method) { case SORT_NO_SORT: // no sorting break;//from w w w. j ava2 s . c o m case SORT_LOC_NAT: // Locale-sensitive natural sort Collections.sort(files, new Comparator<File>() { @Override public int compare(File f1, File f2) { String filename1ToSort = FileUtil.renameForSorting(f1.getName()); String filename2ToSort = FileUtil.renameForSorting(f2.getName()); return NaturalComparator.compareNatural(collator, filename1ToSort, filename2ToSort); } }); break; case SORT_INS_ASCII: // Case-insensitive ASCIIbetical sort Collections.sort(files, new Comparator<File>() { @Override public int compare(File f1, File f2) { String filename1ToSort = FileUtil.renameForSorting(f1.getName()); String filename2ToSort = FileUtil.renameForSorting(f2.getName()); return filename1ToSort.compareToIgnoreCase(filename2ToSort); } }); break; case SORT_MOD_OLD: // Sort by modified date, oldest first Collections.sort(files, new Comparator<File>() { @Override public int compare(File f1, File f2) { return Long.valueOf(f1.lastModified()).compareTo(f2.lastModified()); } }); break; case SORT_MOD_NEW: // Sort by modified date, newest first Collections.sort(files, new Comparator<File>() { @Override public int compare(File f1, File f2) { return Long.valueOf(f2.lastModified()).compareTo(f1.lastModified()); } }); break; case SORT_RANDOM: // Random Collections.shuffle(files, new Random(System.currentTimeMillis())); break; case SORT_LOC_SENS: // Same as default default: // Locale-sensitive A-Z Collections.sort(files, new Comparator<File>() { @Override public int compare(File f1, File f2) { String filename1ToSort = FileUtil.renameForSorting(f1.getName()); String filename2ToSort = FileUtil.renameForSorting(f2.getName()); return collator.compare(filename1ToSort, filename2ToSort); } }); break; } }
From source file:CV.java
public CV(AbstractCELA la, AbstractLearningProblem lp, final AbstractReasonerComponent rs, int folds, boolean leaveOneOut) { //console rendering of class expressions ManchesterOWLSyntaxOWLObjectRendererImpl renderer = new ManchesterOWLSyntaxOWLObjectRendererImpl(); ToStringRenderer.getInstance().setRenderer(renderer); ToStringRenderer.getInstance().setShortFormProvider(new SimpleShortFormProvider()); // the training and test sets used later on List<Set<OWLIndividual>> trainingSetsPos = new LinkedList<Set<OWLIndividual>>(); List<Set<OWLIndividual>> trainingSetsNeg = new LinkedList<Set<OWLIndividual>>(); List<Set<OWLIndividual>> testSetsPos = new LinkedList<Set<OWLIndividual>>(); List<Set<OWLIndividual>> testSetsNeg = new LinkedList<Set<OWLIndividual>>(); // get examples and shuffle them too Set<OWLIndividual> posExamples; Set<OWLIndividual> negExamples; if (lp instanceof PosNegLP) { posExamples = OWLAPIConverter.getOWLAPIIndividuals(((PosNegLP) lp).getPositiveExamples()); negExamples = OWLAPIConverter.getOWLAPIIndividuals(((PosNegLP) lp).getNegativeExamples()); } else if (lp instanceof PosOnlyLP) { posExamples = OWLAPIConverter.getOWLAPIIndividuals(((PosNegLP) lp).getPositiveExamples()); negExamples = new HashSet<OWLIndividual>(); } else {/*from w w w.j ava 2 s. com*/ throw new IllegalArgumentException("Only PosNeg and PosOnly learning problems are supported"); } List<OWLIndividual> posExamplesList = new LinkedList<OWLIndividual>(posExamples); List<OWLIndividual> negExamplesList = new LinkedList<OWLIndividual>(negExamples); Collections.shuffle(posExamplesList, new Random(1)); Collections.shuffle(negExamplesList, new Random(2)); // sanity check whether nr. of folds makes sense for this benchmark if (!leaveOneOut && (posExamples.size() < folds && negExamples.size() < folds)) { System.out.println("The number of folds is higher than the number of " + "positive/negative examples. This can result in empty test sets. Exiting."); System.exit(0); } // if (leaveOneOut) { // note that leave-one-out is not identical to k-fold with // k = nr. of examples in the current implementation, because // with n folds and n examples there is no guarantee that a fold // is never empty (this is an implementation issue) int nrOfExamples = posExamples.size() + negExamples.size(); for (int i = 0; i < nrOfExamples; i++) { // ... } System.out.println("Leave-one-out not supported yet."); System.exit(1); } else { // calculating where to split the sets, ; note that we split // positive and negative examples separately such that the // distribution of positive and negative examples remains similar // (note that there are better but more complex ways to implement this, // which guarantee that the sum of the elements of a fold for pos // and neg differs by at most 1 - it can differ by 2 in our implementation, // e.g. with 3 folds, 4 pos. examples, 4 neg. examples) int[] splitsPos = calculateSplits(posExamples.size(), folds); int[] splitsNeg = calculateSplits(negExamples.size(), folds); // System.out.println(splitsPos[0]); // System.out.println(splitsNeg[0]); // calculating training and test sets for (int i = 0; i < folds; i++) { Set<OWLIndividual> testPos = getTestingSet(posExamplesList, splitsPos, i); Set<OWLIndividual> testNeg = getTestingSet(negExamplesList, splitsNeg, i); testSetsPos.add(i, testPos); testSetsNeg.add(i, testNeg); trainingSetsPos.add(i, getTrainingSet(posExamples, testPos)); trainingSetsNeg.add(i, getTrainingSet(negExamples, testNeg)); } } // run the algorithm if (multiThreaded && lp instanceof Cloneable && la instanceof Cloneable) { ExecutorService es = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() - 1); for (int currFold = 0; currFold < folds; currFold++) { try { final AbstractLearningProblem lpClone = (AbstractLearningProblem) lp.getClass() .getMethod("clone").invoke(lp); final Set<OWLIndividual> trainPos = trainingSetsPos.get(currFold); final Set<OWLIndividual> trainNeg = trainingSetsNeg.get(currFold); final Set<OWLIndividual> testPos = testSetsPos.get(currFold); final Set<OWLIndividual> testNeg = testSetsNeg.get(currFold); if (lp instanceof PosNegLP) { ((PosNegLP) lpClone).setPositiveExamples(OWLAPIConverter.convertIndividuals(trainPos)); ((PosNegLP) lpClone).setNegativeExamples(OWLAPIConverter.convertIndividuals(trainNeg)); } else if (lp instanceof PosOnlyLP) { ((PosOnlyLP) lpClone).setPositiveExamples( new TreeSet<Individual>(OWLAPIConverter.convertIndividuals(trainPos))); } final AbstractCELA laClone = (AbstractCELA) la.getClass().getMethod("clone").invoke(la); final int i = currFold; es.submit(new Runnable() { @Override public void run() { try { validate(laClone, lpClone, rs, i, trainPos, trainNeg, testPos, testNeg); } catch (Exception e) { e.printStackTrace(); } } }); } catch (IllegalAccessException e) { e.printStackTrace(); } catch (IllegalArgumentException e) { e.printStackTrace(); } catch (InvocationTargetException e) { e.printStackTrace(); } catch (NoSuchMethodException e) { e.printStackTrace(); } catch (SecurityException e) { e.printStackTrace(); } } es.shutdown(); try { es.awaitTermination(1, TimeUnit.DAYS); } catch (InterruptedException e) { e.printStackTrace(); } } else { for (int currFold = 0; currFold < folds; currFold++) { final Set<OWLIndividual> trainPos = trainingSetsPos.get(currFold); final Set<OWLIndividual> trainNeg = trainingSetsNeg.get(currFold); final Set<OWLIndividual> testPos = testSetsPos.get(currFold); final Set<OWLIndividual> testNeg = testSetsNeg.get(currFold); if (lp instanceof PosNegLP) { ((PosNegLP) lp).setPositiveExamples(OWLAPIConverter.convertIndividuals(trainPos)); ((PosNegLP) lp).setNegativeExamples(OWLAPIConverter.convertIndividuals(trainNeg)); } else if (lp instanceof PosOnlyLP) { Set<Individual> convertIndividuals = OWLAPIConverter.convertIndividuals(trainPos); ((PosOnlyLP) lp).setPositiveExamples(new TreeSet<Individual>(convertIndividuals)); } validate(la, lp, rs, currFold, trainPos, trainNeg, testPos, testNeg); } } outputWriter(""); outputWriter("Finished " + folds + "-folds cross-validation."); outputWriter("runtime: " + statOutput(df, runtime, "s")); outputWriter("length: " + statOutput(df, length, "")); outputWriter("F-Measure on training set: " + statOutput(df, fMeasureTraining, "%")); outputWriter("F-Measure: " + statOutput(df, fMeasure, "%")); outputWriter("predictive accuracy on training set: " + statOutput(df, accuracyTraining, "%")); outputWriter("predictive accuracy: " + statOutput(df, accuracy, "%")); }
From source file:coral.utils.Matching.java
/** * Shuffle the array for N==2 will produce alternating behaviour * /*from w w w . ja v a 2s . c o m*/ * @param in * @param seed * @return */ public static Integer[] shuffle(Integer[] in, long seed) { int n = in.length; Integer[] out; List<Integer> l = Arrays.asList(in); Collections.shuffle(l, new Random((seed * 48271) % 2147483647)); out = l.toArray(new Integer[n]); return out; }
From source file:com.linkedin.pinot.common.query.gen.AvroQueryGenerator.java
public List<TestGroupByAggreationQuery> giveMeNGroupByAggregationQueries(int n) { Collections.shuffle(groupByQueries, new Random(System.currentTimeMillis())); if (n <= aggregationQueries.size()) { return groupByQueries.subList(0, n); }//from w w w . ja va 2s . c o m return groupByQueries; }
From source file:andromache.hadoop.CassandraInputFormat.java
public List<InputSplit> getSplits(JobContext context) throws IOException { Configuration conf = context.getConfiguration(); validateConfiguration(conf);/*from w w w. j ava 2 s. c om*/ // cannonical ranges and nodes holding replicas List<TokenRange> masterRangeNodes = getRangeMap(conf); keyspace = CassandraConfigHelper.getInputKeyspace(context.getConfiguration()); cfNames = CassandraConfigHelper.getInputColumnFamilies(context.getConfiguration()); // TODO: [IS] make sure this partitioner matches to what is set on each keyspace participating partitioner = CassandraConfigHelper.getInputPartitioner(context.getConfiguration()); logger.debug("partitioner is " + partitioner); // cannonical ranges, split into pieces, fetching the splits in parallel ExecutorService executor = Executors.newCachedThreadPool(); List<InputSplit> splits = new ArrayList<InputSplit>(); try { List<Future<List<CassandraSplit>>> splitfutures = new ArrayList<Future<List<CassandraSplit>>>(); KeyRange jobKeyRange = CassandraConfigHelper.getInputKeyRange(conf); Range<Token> jobRange = null; if (jobKeyRange != null && jobKeyRange.start_token != null) { assert partitioner .preservesOrder() : "ConfigHelper.setInputKeyRange(..) can only be used with a order preserving paritioner"; assert jobKeyRange.start_key == null : "only start_token supported"; assert jobKeyRange.end_key == null : "only end_token supported"; jobRange = new Range<Token>(partitioner.getTokenFactory().fromString(jobKeyRange.start_token), partitioner.getTokenFactory().fromString(jobKeyRange.end_token), partitioner); } for (TokenRange range : masterRangeNodes) { if (jobRange == null) { // for each range, pick a live owner and ask it to compute bite-sized splits splitfutures.add(executor.submit(new SplitCallable(range, conf))); } else { Range<Token> dhtRange = new Range<Token>( partitioner.getTokenFactory().fromString(range.start_token), partitioner.getTokenFactory().fromString(range.end_token), partitioner); if (dhtRange.intersects(jobRange)) { for (Range<Token> intersection : dhtRange.intersectionWith(jobRange)) { range.start_token = partitioner.getTokenFactory().toString(intersection.left); range.end_token = partitioner.getTokenFactory().toString(intersection.right); // for each range, pick a live owner and ask it to compute bite-sized splits splitfutures.add(executor.submit(new SplitCallable(range, conf))); } } } } // wait until we have all the results back for (Future<List<CassandraSplit>> futureInputSplits : splitfutures) { try { splits.addAll(futureInputSplits.get()); } catch (Exception e) { throw new IOException("Could not get input splits", e); } } } finally { executor.shutdownNow(); } assert splits.size() > 0; Collections.shuffle(splits, new Random(System.nanoTime())); return splits; }