List of usage examples for java.util BitSet cardinality
public int cardinality()
From source file:cascading.tap.hadoop.ZipInputFormatTest.java
public void testSplits() throws Exception { JobConf job = new JobConf(); FileSystem currentFs = FileSystem.get(job); Path file = new Path(workDir, "test.zip"); Reporter reporter = Reporter.NULL;/*from w w w . ja va2s . com*/ int seed = new Random().nextInt(); LOG.info("seed = " + seed); Random random = new Random(seed); FileInputFormat.setInputPaths(job, file); for (int entries = 1; entries < MAX_ENTRIES; entries += random.nextInt(MAX_ENTRIES / 10) + 1) { ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); ZipOutputStream zos = new ZipOutputStream(byteArrayOutputStream); long length = 0; LOG.debug("creating; zip file with entries = " + entries); // for each entry in the zip file for (int entryCounter = 0; entryCounter < entries; entryCounter++) { // construct zip entries splitting MAX_LENGTH between entries long entryLength = MAX_LENGTH / entries; ZipEntry zipEntry = new ZipEntry("/entry" + entryCounter + ".txt"); zipEntry.setMethod(ZipEntry.DEFLATED); zos.putNextEntry(zipEntry); for (length = entryCounter * entryLength; length < (entryCounter + 1) * entryLength; length++) { zos.write(Long.toString(length).getBytes()); zos.write("\n".getBytes()); } zos.flush(); zos.closeEntry(); } zos.flush(); zos.close(); currentFs.delete(file, true); OutputStream outputStream = currentFs.create(file); byteArrayOutputStream.writeTo(outputStream); outputStream.close(); ZipInputFormat format = new ZipInputFormat(); format.configure(job); LongWritable key = new LongWritable(); Text value = new Text(); InputSplit[] splits = format.getSplits(job, 100); BitSet bits = new BitSet((int) length); for (int j = 0; j < splits.length; j++) { LOG.debug("split[" + j + "]= " + splits[j]); RecordReader<LongWritable, Text> reader = format.getRecordReader(splits[j], job, reporter); try { int count = 0; while (reader.next(key, value)) { int v = Integer.parseInt(value.toString()); LOG.debug("read " + v); if (bits.get(v)) LOG.warn("conflict with " + v + " in split " + j + " at position " + reader.getPos()); assertFalse("key in multiple partitions.", bits.get(v)); bits.set(v); count++; } LOG.debug("splits[" + j + "]=" + splits[j] + " count=" + count); } finally { reader.close(); } } assertEquals("some keys in no partition.", length, bits.cardinality()); } }
From source file:dr.app.tools.AntigenicPlotter.java
public AntigenicPlotter(int burnin, boolean tabFormat, boolean discreteModel, final String inputFileName, final String treeFileName, final String outputFileName) throws IOException { double[][] reference = null; List<String> tipLabels = null; if (treeFileName != null) { System.out.println("Reading tree file..."); NexusImporter importer = new NexusImporter(new FileReader(treeFileName)); try {// ww w . j a va2s . co m Tree tree = importer.importNextTree(); reference = new double[tree.getExternalNodeCount()][2]; tipLabels = new ArrayList<String>(); for (int i = 0; i < tree.getExternalNodeCount(); i++) { NodeRef tip = tree.getExternalNode(i); tipLabels.add(tree.getNodeTaxon(tip).getId()); reference[i][0] = (Double) tree.getNodeAttribute(tip, "antigenic1"); reference[i][1] = (Double) tree.getNodeAttribute(tip, "antigenic2"); } } catch (Importer.ImportException e) { e.printStackTrace(); return; } } System.out.println("Reading log file..."); FileReader fileReader = new FileReader(inputFileName); try { File file = new File(inputFileName); LogFileTraces traces = new LogFileTraces(inputFileName, file); traces.loadTraces(); if (burnin == -1) { burnin = (int) (traces.getMaxState() / 10); } traces.setBurnIn(burnin); System.out.println(); System.out.println("burnIn <= " + burnin); System.out.println("maxState = " + traces.getMaxState()); System.out.println(); int traceCount = traces.getTraceCount(); if (discreteModel) { // for the discrete model, there are 4 sets of traces, pairs coordinates, cluster allocations, and cluster sizes traceCount /= 4; } else { // for continuous, just pairs of coordinates traceCount /= 2; } int stateCount = traces.getStateCount(); double[][][] data; String[] labels = new String[traceCount]; if (tipLabels != null) { data = new double[stateCount][tipLabels.size()][2]; } else { data = new double[stateCount][traceCount][2]; } for (int i = 0; i < traceCount; i++) { String name = traces.getTraceName(i * 2); name = name.substring(0, name.length() - 1); if (tipLabels != null) { int index = tipLabels.indexOf(name); if (index != -1) { for (int j = 0; j < stateCount; j++) { data[j][index][0] = traces.getStateValue(i * 2, j); data[j][index][1] = traces.getStateValue((i * 2) + 1, j); } } } else { for (int j = 0; j < stateCount; j++) { data[j][i][0] = traces.getStateValue(i * 2, j); data[j][i][1] = traces.getStateValue((i * 2) + 1, j); } labels[i] = name; } } int[][] clusterIndices = null; int[][] clusterSizes = null; if (discreteModel) { clusterIndices = new int[stateCount][traceCount]; clusterSizes = new int[stateCount][traceCount]; for (int i = 0; i < traceCount; i++) { for (int j = 0; j < stateCount; j++) { clusterIndices[j][i] = (int) traces.getStateValue((traceCount * 2) + i, j); clusterSizes[j][i] = (int) traces.getStateValue((traceCount * 3) + i, j); } } Map<BitSet, Integer> clusterMap = new HashMap<BitSet, Integer>(); for (int i = 0; i < stateCount; i++) { BitSet[] clusters = new BitSet[clusterIndices[i].length]; for (int j = 0; j < clusterIndices[i].length; j++) { BitSet bits = clusters[clusterIndices[i][j]]; if (bits == null) { bits = new BitSet(); clusters[clusterIndices[i][j]] = bits; } bits.set(j); Integer count = clusterMap.get(bits); if (count == null) { count = 0; } clusterMap.put(bits, count + 1); } Arrays.sort(clusters, new Comparator<BitSet>() { public int compare(BitSet bitSet1, BitSet bitSet2) { if (bitSet1 == null) { return -1; } if (bitSet2 == null) { return 1; } return bitSet2.cardinality() - bitSet1.cardinality(); } }); } for (BitSet bits : clusterMap.keySet()) { int count = clusterMap.get(bits); if (count > 1) { System.out.print(count); for (int i = bits.nextSetBit(0); i >= 0; i = bits.nextSetBit(i + 1)) { System.out.print("\t" + labels[i]); } System.out.println(); } } } if (tipLabels != null) { labels = new String[tipLabels.size()]; tipLabels.toArray(labels); } if (reference != null) { procrustinate(data, reference); } else { procrustinate(data); } if (tabFormat) { writeTabformat(outputFileName, labels, data); } else { if (discreteModel) { writeKML(outputFileName, labels, data, clusterIndices, clusterSizes); } else { writeKML(outputFileName, labels, data); } } } catch (Exception e) { System.err.println("Error Parsing Input File: " + e.getMessage()); e.printStackTrace(System.err); return; } fileReader.close(); }
From source file:juicebox.data.MatrixZoomData.java
/** * Computes eigenvector from Pearson's./*from w ww . j av a 2 s .co m*/ * * @param df Expected values, needed to get Pearson's * @param which Which eigenvector; 0 is principal. * @return Eigenvector */ public double[] computeEigenvector(ExpectedValueFunction df, int which) { BasicMatrix pearsons = getPearsons(df); if (pearsons == null) { return null; } int dim = pearsons.getRowDimension(); double[][] data = new double[dim][dim]; BitSet bitSet = new BitSet(dim); for (int i = 0; i < dim; i++) { for (int j = 0; j < dim; j++) { float tmp = pearsons.getEntry(i, j); data[i][j] = tmp; if (data[i][j] != 0 && !Float.isNaN(tmp)) { bitSet.set(i); } } } int[] nonCentromereColumns = new int[bitSet.cardinality()]; int count = 0; for (int i = 0; i < dim; i++) { if (bitSet.get(i)) nonCentromereColumns[count++] = i; } RealMatrix subMatrix = new Array2DRowRealMatrix(data).getSubMatrix(nonCentromereColumns, nonCentromereColumns); RealVector rv = (new EigenDecompositionImpl(subMatrix, 0)).getEigenvector(which); double[] ev = rv.toArray(); int size = pearsons.getColumnDimension(); double[] eigenvector = new double[size]; int num = 0; for (int i = 0; i < size; i++) { if (num < nonCentromereColumns.length && i == nonCentromereColumns[num]) { eigenvector[i] = ev[num]; num++; } else { eigenvector[i] = Double.NaN; } } return eigenvector; }
From source file:com.bittorrent.mpetazzoni.client.SharedTorrent.java
/** * Peer disconnection handler.//from ww w . ja v a2s. c o m * * <p> * When a peer disconnects, we need to mark in all of the pieces it had * available that they can't be reached through this peer anymore. * </p> * * @param peer The peer we got this piece from. */ @Override public synchronized void handlePeerDisconnected(SharingPeer peer) { BitSet availablePieces = peer.getAvailablePieces(); for (int i = availablePieces.nextSetBit(0); i >= 0; i = availablePieces.nextSetBit(i + 1)) { this.rarest.remove(this.pieces[i]); this.pieces[i].noLongerAt(peer); this.rarest.add(this.pieces[i]); } Piece requested = peer.getRequestedPiece(); if (requested != null) { this.requestedPieces.set(requested.getIndex(), false); } logger.debug("Peer {} went away with {} piece(s) [completed={}; available={}/{}]", new Object[] { peer, availablePieces.cardinality(), this.completedPieces.cardinality(), this.getAvailablePieces().cardinality(), this.pieces.length }); logger.trace("We now have {} piece(s) and {} outstanding request(s): {}", new Object[] { this.completedPieces.cardinality(), this.requestedPieces.cardinality(), this.requestedPieces }); }
From source file:itemsetmining.itemset.ItemsetTree.java
/** * Pearson's chi-squared test for itemset independence. This tests the * empirical itemset distribution against the independence model. * * <p>//ww w.j a v a2s .c o m * N.B. the chi-squared distribution has one degree of freedom. * * @see S. Brin et al. Beyond Market Baskets: Generalizing Association Rules * to Correlations */ private double recursiveChiSquared(final int n, final BitSet cell, final int[] sortedItems, final Multiset<Integer> singletons) { double chiSquared = 0.; if (n == sortedItems.length) { double pInd = noTransactions; final int[] inItems = new int[cell.cardinality()]; final int[] outItems = new int[n - cell.cardinality()]; int i = 0, j = 0; for (int k = 0; k < n; k++) { if (cell.get(k)) { inItems[i] = sortedItems[k]; i++; pInd *= singletons.count(sortedItems[k]) / (double) noTransactions; } else { outItems[j] = sortedItems[k]; j++; pInd *= (noTransactions - singletons.count(sortedItems[k])) / (double) noTransactions; } } final double pEmp = countEmpirical(inItems, outItems, root, new int[0]); chiSquared = ((pEmp - pInd) * (pEmp - pInd)) / pInd; } else { final BitSet celln = (BitSet) cell.clone(); celln.set(n); chiSquared += recursiveChiSquared(n + 1, celln, sortedItems, singletons); chiSquared += recursiveChiSquared(n + 1, cell, sortedItems, singletons); } return chiSquared; }
From source file:org.apache.hadoop.mapreduce.lib.input.TestCombineSequenceFileInputFormat.java
@Test(timeout = 10000) public void testFormat() throws IOException, InterruptedException { Job job = Job.getInstance(conf);/*from ww w. j a va 2 s .c o m*/ Random random = new Random(); long seed = random.nextLong(); random.setSeed(seed); localFs.delete(workDir, true); FileInputFormat.setInputPaths(job, workDir); final int length = 10000; final int numFiles = 10; // create files with a variety of lengths createFiles(length, numFiles, random, job); TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration()); // create a combine split for the files InputFormat<IntWritable, BytesWritable> format = new CombineSequenceFileInputFormat<IntWritable, BytesWritable>(); for (int i = 0; i < 3; i++) { int numSplits = random.nextInt(length / (SequenceFile.SYNC_INTERVAL / 20)) + 1; LOG.info("splitting: requesting = " + numSplits); List<InputSplit> splits = format.getSplits(job); LOG.info("splitting: got = " + splits.size()); // we should have a single split as the length is comfortably smaller than // the block size assertEquals("We got more than one splits!", 1, splits.size()); InputSplit split = splits.get(0); assertEquals("It should be CombineFileSplit", CombineFileSplit.class, split.getClass()); // check the split BitSet bits = new BitSet(length); RecordReader<IntWritable, BytesWritable> reader = format.createRecordReader(split, context); MapContext<IntWritable, BytesWritable, IntWritable, BytesWritable> mcontext = new MapContextImpl<IntWritable, BytesWritable, IntWritable, BytesWritable>( job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split); reader.initialize(split, mcontext); assertEquals("reader class is CombineFileRecordReader.", CombineFileRecordReader.class, reader.getClass()); try { while (reader.nextKeyValue()) { IntWritable key = reader.getCurrentKey(); BytesWritable value = reader.getCurrentValue(); assertNotNull("Value should not be null.", value); final int k = key.get(); LOG.debug("read " + k); assertFalse("Key in multiple partitions.", bits.get(k)); bits.set(k); } } finally { reader.close(); } assertEquals("Some keys in no partition.", length, bits.cardinality()); } }
From source file:org.apache.hadoop.mapreduce.lib.input.TestKeyValueTextInputFormat.java
public void testFormat() throws Exception { Job job = new Job(defaultConf); Path file = new Path(workDir, "test.txt"); int seed = new Random().nextInt(); LOG.info("seed = " + seed); Random random = new Random(seed); localFs.delete(workDir, true);/* w w w.ja v a2 s .co m*/ FileInputFormat.setInputPaths(job, workDir); // for a variety of lengths for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) { LOG.debug("creating; entries = " + length); // create a file with length entries Writer writer = new OutputStreamWriter(localFs.create(file)); try { for (int i = 0; i < length; i++) { writer.write(Integer.toString(i * 2)); writer.write("\t"); writer.write(Integer.toString(i)); writer.write("\n"); } } finally { writer.close(); } KeyValueTextInputFormat format = new KeyValueTextInputFormat(); JobContext jobContext = new JobContext(job.getConfiguration(), new JobID()); List<InputSplit> splits = format.getSplits(jobContext); LOG.debug("splitting: got = " + splits.size()); TaskAttemptContext context = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()); // check each split BitSet bits = new BitSet(length); for (InputSplit split : splits) { LOG.debug("split= " + split); RecordReader<Text, Text> reader = format.createRecordReader(split, context); Class readerClass = reader.getClass(); assertEquals("reader class is KeyValueLineRecordReader.", KeyValueLineRecordReader.class, readerClass); reader.initialize(split, context); try { int count = 0; while (reader.nextKeyValue()) { int v = Integer.parseInt(reader.getCurrentValue().toString()); LOG.debug("read " + v); if (bits.get(v)) { LOG.warn("conflict with " + v + " in split " + split + " at " + reader.getProgress()); } assertFalse("Key in multiple partitions.", bits.get(v)); bits.set(v); count++; } LOG.debug("split=" + split + " count=" + count); } finally { reader.close(); } } assertEquals("Some keys in no partition.", length, bits.cardinality()); } }
From source file:ArrayUtils.java
/** * Removes all contents of <code>array2</code> from <code>array1</code>. All * instances of <code>array2</code> will also be removed from * <code>array1</code>./*from w w w. j a va2 s.c o m*/ * * @param <T> * The type of the array * @param array1 * The array to remove elements from * @param array2 * The array containing the elements to remove; or the element to * remove itself * @return <code>array1</code> missing all the contents of * <code>array2</code> */ public static <T> T[] removeAll(T[] array1, Object array2) { if (array1 == null || array2 == null) return array1; if (!array1.getClass().isArray()) return null; if (!array2.getClass().isArray()) array2 = new Object[] { array2 }; java.util.BitSet remove = new java.util.BitSet(); int len1 = array1.length; int len2 = Array.getLength(array2); int i, j; for (i = 0; i < len1; i++) { for (j = 0; j < len2; j++) { if (equals(array1[i], Array.get(array2, j))) { remove.set(i); break; } } } T[] ret = (T[]) Array.newInstance(array1.getClass().getComponentType(), len1 - remove.cardinality()); // This copying section might be replaced by a more efficient version // using System.arraycopy()--this would be much faster than reflection, // especially for large arrays needing only a few elements removed for (i = 0, j = 0; i < len1; i++) { if (!remove.get(i)) { ret[j] = array1[i]; j++; } } return ret; }
From source file:org.apache.hadoop.mapred.TestSequenceFileAsTextInputFormat.java
public void testFormat() throws Exception { JobConf job = new JobConf(conf); FileSystem fs = FileSystem.getLocal(conf); Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred"); Path file = new Path(dir, "test.seq"); Reporter reporter = Reporter.NULL;/*from ww w .ja v a 2s. c o m*/ int seed = new Random().nextInt(); //LOG.info("seed = "+seed); Random random = new Random(seed); fs.delete(dir, true); FileInputFormat.setInputPaths(job, dir); // for a variety of lengths for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) { //LOG.info("creating; entries = " + length); // create a file with length entries SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, LongWritable.class); try { for (int i = 0; i < length; i++) { IntWritable key = new IntWritable(i); LongWritable value = new LongWritable(10 * i); writer.append(key, value); } } finally { writer.close(); } // try splitting the file in a variety of sizes InputFormat<Text, Text> format = new SequenceFileAsTextInputFormat(); for (int i = 0; i < 3; i++) { int numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1; //LOG.info("splitting: requesting = " + numSplits); InputSplit[] splits = format.getSplits(job, numSplits); //LOG.info("splitting: got = " + splits.length); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.length; j++) { RecordReader<Text, Text> reader = format.getRecordReader(splits[j], job, reporter); Class readerClass = reader.getClass(); assertEquals("reader class is SequenceFileAsTextRecordReader.", SequenceFileAsTextRecordReader.class, readerClass); Text value = reader.createValue(); Text key = reader.createKey(); try { int count = 0; while (reader.next(key, value)) { // if (bits.get(key.get())) { // LOG.info("splits["+j+"]="+splits[j]+" : " + key.get()); // LOG.info("@"+reader.getPos()); // } int keyInt = Integer.parseInt(key.toString()); assertFalse("Key in multiple partitions.", bits.get(keyInt)); bits.set(keyInt); count++; } //LOG.info("splits["+j+"]="+splits[j]+" count=" + count); } finally { reader.close(); } } assertEquals("Some keys in no partition.", length, bits.cardinality()); } } }
From source file:ArrayUtils.java
/** * Removes all contents of <code>array2</code> from <code>array1</code>. All * instances of <code>array2</code> will also be removed from * <code>array1</code>. For primitive types. * //from ww w . ja va 2s. co m * @param array1 * The array to remove elements from * @param array2 * The array containing the elements to remove; or the element to * remove itself * @return <code>array1</code> missing all the contents of * <code>array2</code> */ public static Object removeAllP(Object array1, Object array2) { if (array1 == null || array2 == null) return array1; if (!array1.getClass().isArray()) return null; if (!array2.getClass().isArray()) array2 = new Object[] { array2 }; else array2 = addP(array2, array2); java.util.BitSet remove = new java.util.BitSet(); int len1 = Array.getLength(array1); int len2 = Array.getLength(array2); int i, j; for (i = 0; i < len1; i++) { for (j = 0; j < len2; j++) { if (equals(Array.get(array1, i), Array.get(array2, j))) { remove.set(i); break; } } } Object ret = Array.newInstance(array1.getClass().getComponentType(), len1 - remove.cardinality()); // This copying section might be replaced by a more efficient version // using System.arraycopy()--this would be much faster than reflection, // especially for large arrays needing only a few elements removed for (i = 0, j = 0; i < len1; i++) { if (!remove.get(i)) { put(ret, Array.get(array1, i), j); j++; } } return ret; }