List of usage examples for java.util BitSet cardinality
public int cardinality()
From source file:srebrinb.compress.sevenzip.SevenZFile.java
private void readFilesInfo(final ByteBuffer header, final Archive archive) throws IOException { final long numFiles = readUint64(header); final SevenZArchiveEntry[] files = new SevenZArchiveEntry[(int) numFiles]; for (int i = 0; i < files.length; i++) { files[i] = new SevenZArchiveEntry(); }/* w w w. j av a 2 s .co m*/ BitSet isEmptyStream = null; BitSet isEmptyFile = null; BitSet isAnti = null; while (true) { final int propertyType = getUnsignedByte(header); if (propertyType == 0) { break; } final long size = readUint64(header); switch (propertyType) { case NID.kEmptyStream: { isEmptyStream = readBits(header, files.length); break; } case NID.kEmptyFile: { if (isEmptyStream == null) { // protect against NPE throw new IOException("Header format error: kEmptyStream must appear before kEmptyFile"); } isEmptyFile = readBits(header, isEmptyStream.cardinality()); break; } case NID.kAnti: { if (isEmptyStream == null) { // protect against NPE throw new IOException("Header format error: kEmptyStream must appear before kAnti"); } isAnti = readBits(header, isEmptyStream.cardinality()); break; } case NID.kName: { final int external = getUnsignedByte(header); if (external != 0) { throw new IOException("Not implemented"); } if (((size - 1) & 1) != 0) { throw new IOException("File names length invalid"); } final byte[] names = new byte[(int) (size - 1)]; header.get(names); int nextFile = 0; int nextName = 0; for (int i = 0; i < names.length; i += 2) { if (names[i] == 0 && names[i + 1] == 0) { files[nextFile++].setName(new String(names, nextName, i - nextName, CharsetNames.UTF_16LE)); nextName = i + 2; } } if (nextName != names.length || nextFile != files.length) { throw new IOException("Error parsing file names"); } break; } case NID.kCTime: { final BitSet timesDefined = readAllOrBits(header, files.length); final int external = getUnsignedByte(header); if (external != 0) { throw new IOException("Unimplemented"); } for (int i = 0; i < files.length; i++) { files[i].setHasCreationDate(timesDefined.get(i)); if (files[i].getHasCreationDate()) { files[i].setCreationDate(header.getLong()); } } break; } case NID.kATime: { final BitSet timesDefined = readAllOrBits(header, files.length); final int external = getUnsignedByte(header); if (external != 0) { throw new IOException("Unimplemented"); } for (int i = 0; i < files.length; i++) { files[i].setHasAccessDate(timesDefined.get(i)); if (files[i].getHasAccessDate()) { files[i].setAccessDate(header.getLong()); } } break; } case NID.kMTime: { final BitSet timesDefined = readAllOrBits(header, files.length); final int external = getUnsignedByte(header); if (external != 0) { throw new IOException("Unimplemented"); } for (int i = 0; i < files.length; i++) { files[i].setHasLastModifiedDate(timesDefined.get(i)); if (files[i].getHasLastModifiedDate()) { files[i].setLastModifiedDate(header.getLong()); } } break; } case NID.kWinAttributes: { final BitSet attributesDefined = readAllOrBits(header, files.length); final int external = getUnsignedByte(header); if (external != 0) { throw new IOException("Unimplemented"); } for (int i = 0; i < files.length; i++) { files[i].setHasWindowsAttributes(attributesDefined.get(i)); if (files[i].getHasWindowsAttributes()) { files[i].setWindowsAttributes(header.getInt()); } } break; } case NID.kStartPos: { throw new IOException("kStartPos is unsupported, please report"); } case NID.kDummy: { // 7z 9.20 asserts the content is all zeros and ignores the property // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 if (skipBytesFully(header, size) < size) { throw new IOException("Incomplete kDummy property"); } break; } default: { // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 if (skipBytesFully(header, size) < size) { throw new IOException("Incomplete property of type " + propertyType); } break; } } } int nonEmptyFileCounter = 0; int emptyFileCounter = 0; for (int i = 0; i < files.length; i++) { files[i].setHasStream(isEmptyStream == null ? true : !isEmptyStream.get(i)); if (files[i].hasStream()) { files[i].setDirectory(false); files[i].setAntiItem(false); files[i].setHasCrc(archive.subStreamsInfo.hasCrc.get(nonEmptyFileCounter)); files[i].setCrcValue(archive.subStreamsInfo.crcs[nonEmptyFileCounter]); files[i].setSize(archive.subStreamsInfo.unpackSizes[nonEmptyFileCounter]); ++nonEmptyFileCounter; } else { files[i].setDirectory(isEmptyFile == null ? true : !isEmptyFile.get(emptyFileCounter)); files[i].setAntiItem(isAnti == null ? false : isAnti.get(emptyFileCounter)); files[i].setHasCrc(false); files[i].setSize(0); ++emptyFileCounter; } } archive.files = files; calculateStreamMap(archive); }
From source file:org.apache.tez.runtime.library.common.writers.TestUnorderedPartitionedKVWriter.java
private void baseTest(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress) throws IOException, InterruptedException { PartitionerForTest partitioner = new PartitionerForTest(); ApplicationId appId = ApplicationId.newInstance(10000, 1); TezCounters counters = new TezCounters(); String uniqueId = UUID.randomUUID().toString(); OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId); Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class, shouldCompress, -1);// www. java 2 s. c om CompressionCodec codec = null; if (shouldCompress) { codec = new DefaultCodec(); ((Configurable) codec).setConf(conf); } int numOutputs = numPartitions; long availableMemory = 2048; int numRecordsWritten = 0; Map<Integer, Multimap<Integer, Long>> expectedValues = new HashMap<Integer, Multimap<Integer, Long>>(); for (int i = 0; i < numOutputs; i++) { expectedValues.put(i, LinkedListMultimap.<Integer, Long>create()); } UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf, numOutputs, availableMemory); int sizePerBuffer = kvWriter.sizePerBuffer; int sizePerRecord = 4 + 8; // IntW + LongW int sizePerRecordWithOverhead = sizePerRecord + 12; // Record + META_OVERHEAD IntWritable intWritable = new IntWritable(); LongWritable longWritable = new LongWritable(); for (int i = 0; i < numRecords; i++) { intWritable.set(i); longWritable.set(i); int partition = partitioner.getPartition(intWritable, longWritable, numOutputs); if (skippedPartitions != null && skippedPartitions.contains(partition)) { continue; } expectedValues.get(partition).put(intWritable.get(), longWritable.get()); kvWriter.write(intWritable, longWritable); numRecordsWritten++; } List<Event> events = kvWriter.close(); int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead; int numExpectedSpills = numRecordsWritten / recordsPerBuffer; verify(outputContext, never()).fatalError(any(Throwable.class), any(String.class)); // Verify the status of the buffers if (numExpectedSpills == 0) { assertEquals(1, kvWriter.numInitializedBuffers); } else { assertTrue(kvWriter.numInitializedBuffers > 1); } assertNull(kvWriter.currentBuffer); assertEquals(0, kvWriter.availableBuffers.size()); // Verify the counters TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES); TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS); TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD); TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL); TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS); TezCounter additionalSpillBytesWritternCounter = counters .findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN); TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ); TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT); assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue()); assertEquals(numRecordsWritten, outputRecordsCounter.getValue()); assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue()); long fileOutputBytes = fileOutputBytesCounter.getValue(); if (numRecordsWritten > 0) { assertTrue(fileOutputBytes > 0); if (!shouldCompress) { assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue()); } } else { assertEquals(0, fileOutputBytes); } assertEquals(recordsPerBuffer * numExpectedSpills, spilledRecordsCounter.getValue()); long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue(); long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue(); if (numExpectedSpills == 0) { assertEquals(0, additionalSpillBytesWritten); assertEquals(0, additionalSpillBytesRead); } else { assertTrue(additionalSpillBytesWritten > 0); assertTrue(additionalSpillBytesRead > 0); if (!shouldCompress) { assertTrue(additionalSpillBytesWritten > (recordsPerBuffer * numExpectedSpills * sizePerRecord)); assertTrue(additionalSpillBytesRead > (recordsPerBuffer * numExpectedSpills * sizePerRecord)); } } assertTrue(additionalSpillBytesWritten == additionalSpillBytesRead); assertEquals(numExpectedSpills, numAdditionalSpillsCounter.getValue()); BitSet emptyPartitionBits = null; // Verify the event returned assertEquals(1, events.size()); assertTrue(events.get(0) instanceof CompositeDataMovementEvent); CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(0); assertEquals(0, cdme.getSourceIndexStart()); assertEquals(numOutputs, cdme.getCount()); DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto .parseFrom(ByteString.copyFrom(cdme.getUserPayload())); assertFalse(eventProto.hasData()); if (skippedPartitions == null && numRecordsWritten > 0) { assertFalse(eventProto.hasEmptyPartitions()); emptyPartitionBits = new BitSet(numPartitions); } else { assertTrue(eventProto.hasEmptyPartitions()); byte[] emptyPartitions = TezCommonUtils .decompressByteStringToByteArray(eventProto.getEmptyPartitions()); emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions); if (numRecordsWritten == 0) { assertEquals(numPartitions, emptyPartitionBits.cardinality()); } else { for (Integer e : skippedPartitions) { assertTrue(emptyPartitionBits.get(e)); } assertEquals(skippedPartitions.size(), emptyPartitionBits.cardinality()); } } if (emptyPartitionBits.cardinality() != numPartitions) { assertEquals(HOST_STRING, eventProto.getHost()); assertEquals(SHUFFLE_PORT, eventProto.getPort()); assertEquals(uniqueId, eventProto.getPathComponent()); } else { assertFalse(eventProto.hasHost()); assertFalse(eventProto.hasPort()); assertFalse(eventProto.hasPathComponent()); } // Verify the actual data TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId); Path outputFilePath = null; Path spillFilePath = null; try { outputFilePath = taskOutput.getOutputFile(); } catch (DiskErrorException e) { if (numRecordsWritten > 0) { fail(); } else { // Record checking not required. return; } } try { spillFilePath = taskOutput.getOutputIndexFile(); } catch (DiskErrorException e) { if (numRecordsWritten > 0) { fail(); } else { // Record checking not required. return; } } // Special case for 0 records. TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf); DataInputBuffer keyBuffer = new DataInputBuffer(); DataInputBuffer valBuffer = new DataInputBuffer(); IntWritable keyDeser = new IntWritable(); LongWritable valDeser = new LongWritable(); for (int i = 0; i < numOutputs; i++) { if (skippedPartitions != null && skippedPartitions.contains(i)) { continue; } TezIndexRecord indexRecord = spillRecord.getIndex(i); FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath); inStream.seek(indexRecord.getStartOffset()); IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false, 0, -1); while (reader.nextRawKey(keyBuffer)) { reader.nextRawValue(valBuffer); keyDeser.readFields(keyBuffer); valDeser.readFields(valBuffer); int partition = partitioner.getPartition(keyDeser, valDeser, numOutputs); assertTrue(expectedValues.get(partition).remove(keyDeser.get(), valDeser.get())); } inStream.close(); } for (int i = 0; i < numOutputs; i++) { assertEquals(0, expectedValues.get(i).size()); expectedValues.remove(i); } assertEquals(0, expectedValues.size()); }
From source file:bobs.is.compress.sevenzip.SevenZFile.java
private void readFilesInfo(final DataInput header, final Archive archive) throws IOException { final long numFiles = readUint64(header); final SevenZArchiveEntry[] files = new SevenZArchiveEntry[(int) numFiles]; for (int i = 0; i < files.length; i++) { files[i] = new SevenZArchiveEntry(); }//from w ww . j av a 2s . c o m BitSet isEmptyStream = null; BitSet isEmptyFile = null; BitSet isAnti = null; while (true) { final int propertyType = header.readUnsignedByte(); if (propertyType == 0) { break; } final long size = readUint64(header); switch (propertyType) { case NID.kEmptyStream: { isEmptyStream = readBits(header, files.length); break; } case NID.kEmptyFile: { if (isEmptyStream == null) { // protect against NPE throw new IOException("Header format error: kEmptyStream must appear before kEmptyFile"); } isEmptyFile = readBits(header, isEmptyStream.cardinality()); break; } case NID.kAnti: { if (isEmptyStream == null) { // protect against NPE throw new IOException("Header format error: kEmptyStream must appear before kAnti"); } isAnti = readBits(header, isEmptyStream.cardinality()); break; } case NID.kName: { final int external = header.readUnsignedByte(); if (external != 0) { throw new IOException("Not implemented"); } if (((size - 1) & 1) != 0) { throw new IOException("File names length invalid"); } final byte[] names = new byte[(int) (size - 1)]; header.readFully(names); int nextFile = 0; int nextName = 0; for (int i = 0; i < names.length; i += 2) { if (names[i] == 0 && names[i + 1] == 0) { files[nextFile].setName(new String(names, nextName, i - nextName, CharsetNames.UTF_16LE)); nextName = i + 2; this.mapFilename.put(files[nextFile].getName(), nextFile); nextFile++; } } if (nextName != names.length || nextFile != files.length) { throw new IOException("Error parsing file names"); } break; } case NID.kCTime: { final BitSet timesDefined = readAllOrBits(header, files.length); final int external = header.readUnsignedByte(); if (external != 0) { throw new IOException("Unimplemented"); } for (int i = 0; i < files.length; i++) { files[i].setHasCreationDate(timesDefined.get(i)); if (files[i].getHasCreationDate()) { files[i].setCreationDate(Long.reverseBytes(header.readLong())); } } break; } case NID.kATime: { final BitSet timesDefined = readAllOrBits(header, files.length); final int external = header.readUnsignedByte(); if (external != 0) { throw new IOException("Unimplemented"); } for (int i = 0; i < files.length; i++) { files[i].setHasAccessDate(timesDefined.get(i)); if (files[i].getHasAccessDate()) { files[i].setAccessDate(Long.reverseBytes(header.readLong())); } } break; } case NID.kMTime: { final BitSet timesDefined = readAllOrBits(header, files.length); final int external = header.readUnsignedByte(); if (external != 0) { throw new IOException("Unimplemented"); } for (int i = 0; i < files.length; i++) { files[i].setHasLastModifiedDate(timesDefined.get(i)); if (files[i].getHasLastModifiedDate()) { files[i].setLastModifiedDate(Long.reverseBytes(header.readLong())); } } break; } case NID.kWinAttributes: { final BitSet attributesDefined = readAllOrBits(header, files.length); final int external = header.readUnsignedByte(); if (external != 0) { throw new IOException("Unimplemented"); } for (int i = 0; i < files.length; i++) { files[i].setHasWindowsAttributes(attributesDefined.get(i)); if (files[i].getHasWindowsAttributes()) { files[i].setWindowsAttributes(Integer.reverseBytes(header.readInt())); } } break; } case NID.kStartPos: { throw new IOException("kStartPos is unsupported, please report"); } case NID.kDummy: { // 7z 9.20 asserts the content is all zeros and ignores the property // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 if (skipBytesFully(header, size) < size) { throw new IOException("Incomplete kDummy property"); } break; } default: { // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 if (skipBytesFully(header, size) < size) { throw new IOException("Incomplete property of type " + propertyType); } break; } } } int nonEmptyFileCounter = 0; int emptyFileCounter = 0; for (int i = 0; i < files.length; i++) { files[i].setHasStream(isEmptyStream == null ? true : !isEmptyStream.get(i)); if (files[i].hasStream()) { files[i].setDirectory(false); files[i].setAntiItem(false); files[i].setHasCrc(archive.subStreamsInfo.hasCrc.get(nonEmptyFileCounter)); files[i].setCrcValue(archive.subStreamsInfo.crcs[nonEmptyFileCounter]); files[i].setSize(archive.subStreamsInfo.unpackSizes[nonEmptyFileCounter]); ++nonEmptyFileCounter; } else { files[i].setDirectory(isEmptyFile == null ? true : !isEmptyFile.get(emptyFileCounter)); files[i].setAntiItem(isAnti == null ? false : isAnti.get(emptyFileCounter)); files[i].setHasCrc(false); files[i].setSize(0); ++emptyFileCounter; } } archive.files = files; calculateStreamMap(archive); }
From source file:org.apache.hadoop.mapred.TestKeyValueTextInputFormat.java
public void testFormat() throws Exception { JobConf job = new JobConf(); Path file = new Path(workDir, "test.txt"); // A reporter that does nothing Reporter reporter = Reporter.NULL;/*from w w w. j a v a 2 s.co m*/ int seed = new Random().nextInt(); LOG.info("seed = " + seed); Random random = new Random(seed); localFs.delete(workDir, true); FileInputFormat.setInputPaths(job, workDir); // for a variety of lengths for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) { LOG.debug("creating; entries = " + length); // create a file with length entries Writer writer = new OutputStreamWriter(localFs.create(file)); try { for (int i = 0; i < length; i++) { writer.write(Integer.toString(i * 2)); writer.write("\t"); writer.write(Integer.toString(i)); writer.write("\n"); } } finally { writer.close(); } // try splitting the file in a variety of sizes KeyValueTextInputFormat format = new KeyValueTextInputFormat(); format.configure(job); for (int i = 0; i < 3; i++) { int numSplits = random.nextInt(MAX_LENGTH / 20) + 1; LOG.debug("splitting: requesting = " + numSplits); InputSplit[] splits = format.getSplits(job, numSplits); LOG.debug("splitting: got = " + splits.length); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.length; j++) { LOG.debug("split[" + j + "]= " + splits[j]); RecordReader<Text, Text> reader = format.getRecordReader(splits[j], job, reporter); Class readerClass = reader.getClass(); assertEquals("reader class is KeyValueLineRecordReader.", KeyValueLineRecordReader.class, readerClass); Text key = reader.createKey(); Class keyClass = key.getClass(); Text value = reader.createValue(); Class valueClass = value.getClass(); assertEquals("Key class is Text.", Text.class, keyClass); assertEquals("Value class is Text.", Text.class, valueClass); try { int count = 0; while (reader.next(key, value)) { int v = Integer.parseInt(value.toString()); LOG.debug("read " + v); if (bits.get(v)) { LOG.warn("conflict with " + v + " in split " + j + " at position " + reader.getPos()); } assertFalse("Key in multiple partitions.", bits.get(v)); bits.set(v); count++; } LOG.debug("splits[" + j + "]=" + splits[j] + " count=" + count); } finally { reader.close(); } } assertEquals("Some keys in no partition.", length, bits.cardinality()); } } }
From source file:org.apache.mele.embedded.HadoopQueueEmbedded.java
private boolean ackCheck() throws IOException { LOG.info("Starting ack check"); BitSet bitSet = new BitSet(); FileSystem fileSystem = null; try {// www . j av a 2s .c om _ackLock.lock(); _ackOutputStream.close(); fileSystem = newFileSystem(_file); FileStatus fileStatus = fileSystem.getFileStatus(_file); long dataLength = fileStatus.getLen(); long totalAckLength = getTotalAckLength(fileSystem); if (!couldContainAllAcks(totalAckLength)) { LOG.info("Existing early [" + totalAckLength + "] because [" + totalAckLength % 12 + "]"); return false; } for (Path ackFile : _ackFiles) { LOG.info("Starting ack check for file [" + ackFile + "]"); DFSInputStream inputStream = null; try { inputStream = getDFS(fileSystem.open(ackFile)); long length = inputStream.getFileLength(); DataInputStream dataInputStream = new DataInputStream(inputStream); while (length > 0) { int pos = (int) dataInputStream.readLong(); // @TODO check position // 4 bytes for storing the length of the message int len = dataInputStream.readInt() + 4; bitSet.set(pos, pos + len); length -= 12; } if (bitSet.cardinality() == dataLength) { return true; } } finally { if (inputStream != null) { inputStream.close(); } } } return false; } finally { reopenAckFile(fileSystem); _ackLock.unlock(); if (fileSystem != null) { fileSystem.close(); } } }
From source file:org.roaringbitmap.TestRoaringBitmap.java
public static boolean equals(BitSet bs, RoaringBitmap rr) { final int[] a = new int[bs.cardinality()]; int pos = 0;//from w w w .ja va2s. co m for (int x = bs.nextSetBit(0); x >= 0; x = bs.nextSetBit(x + 1)) a[pos++] = x; return Arrays.equals(rr.toArray(), a); }
From source file:info.extensiblecatalog.OAIToolkit.api.Importer.java
/** *Execute function called when the lucene statistics is been invoked from command line. * @throws ParseException /* w w w . ja v a 2s.c o m*/ * **/ private void statsexecute() throws ParseException { LuceneSearcher ls = new LuceneSearcher(configuration.getLuceneIndex()); Sort sort = null; QueryParser parser = new QueryParser(Version.LUCENE_30, "id", new StandardAnalyzer(Version.LUCENE_30)); BitSet hits_deleted = ls.searchForBits(parser.parse("is_deleted:true"), sort); BitSet hits_notdeleted = ls.searchForBits(parser.parse("is_deleted:false"), sort); BitSet hits_bib_recordtype = ls.searchForBits(parser.parse("record_type:1 AND is_deleted:false"), sort); BitSet hits_bib_recordtype_deleted = ls.searchForBits(parser.parse("record_type:1 AND is_deleted:true"), sort); BitSet hits_auth_recordtype = ls.searchForBits(parser.parse("record_type:2 AND is_deleted:false"), sort); BitSet hits_auth_recordtype_deleted = ls.searchForBits(parser.parse("record_type:2 AND is_deleted:true"), sort); BitSet hits_hold_recordtype = ls.searchForBits(parser.parse("record_type:3 AND is_deleted:false"), sort); BitSet hits_hold_recordtype_deleted = ls.searchForBits(parser.parse("record_type:3 AND is_deleted:true"), sort); BitSet hits_class_recordtype = ls.searchForBits(parser.parse("record_type:4 AND is_deleted:false"), sort); BitSet hits_class_recordtype_deleted = ls.searchForBits(parser.parse("record_type:4 AND is_deleted:true"), sort); BitSet hits_comm_recordtype = ls.searchForBits(parser.parse("record_type:5 AND is_deleted:false"), sort); BitSet hits_comm_recordtype_deleted = ls.searchForBits(parser.parse("record_type:5 AND is_deleted:true"), sort); int deleted_count = hits_deleted.cardinality(); int notdeleted_count = hits_notdeleted.cardinality(); int bib_count = hits_bib_recordtype.cardinality(); int bib_count_deleted = hits_bib_recordtype_deleted.cardinality(); int auth_count = hits_auth_recordtype.cardinality(); int auth_count_deleted = hits_auth_recordtype_deleted.cardinality(); int hold_count = hits_hold_recordtype.cardinality(); int hold_count_deleted = hits_hold_recordtype_deleted.cardinality(); int class_count = hits_class_recordtype.cardinality(); int class_count_deleted = hits_class_recordtype_deleted.cardinality(); int comm_count = hits_comm_recordtype.cardinality(); int comm_count_deleted = hits_comm_recordtype_deleted.cardinality(); lucenestatslog.info(" *************** Lucene Database Statistics *************** \n\n "); lucenestatslog.info("Total records in the Lucene Database are: " + (deleted_count + notdeleted_count)); lucenestatslog.info(" Bibliographic records: " + bib_count); lucenestatslog.info("\t Deleted Bibliographic records: " + bib_count_deleted); lucenestatslog.info(" Authority records: " + auth_count); lucenestatslog.info("\t Deleted Authority records: " + auth_count_deleted); lucenestatslog.info(" Holdings records: " + hold_count); lucenestatslog.info("\t Deleted Holdings records: " + hold_count_deleted); lucenestatslog.info(" Classification records: " + class_count); lucenestatslog.info("\t Deleted Classification records: " + class_count_deleted); lucenestatslog.info(" Community information records: " + comm_count); lucenestatslog.info("\t Deleted Community information records: " + comm_count_deleted); lucenestatslog.info(" Deleted records: " + deleted_count); }
From source file:org.apache.hadoop.mapreduce.lib.input.TestMRKeyValueTextInputFormat.java
@Test public void testSplitableCodecs() throws Exception { final Job job = Job.getInstance(defaultConf); final Configuration conf = job.getConfiguration(); // Create the codec CompressionCodec codec = null;/*from w w w. ja v a 2 s. c o m*/ try { codec = (CompressionCodec) ReflectionUtils .newInstance(conf.getClassByName("org.apache.hadoop.io.compress.BZip2Codec"), conf); } catch (ClassNotFoundException cnfe) { throw new IOException("Illegal codec!"); } Path file = new Path(workDir, "test" + codec.getDefaultExtension()); int seed = new Random().nextInt(); LOG.info("seed = " + seed); Random random = new Random(seed); localFs.delete(workDir, true); FileInputFormat.setInputPaths(job, workDir); final int MAX_LENGTH = 500000; FileInputFormat.setMaxInputSplitSize(job, MAX_LENGTH / 20); // for a variety of lengths for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 4) + 1) { LOG.info("creating; entries = " + length); // create a file with length entries Writer writer = new OutputStreamWriter(codec.createOutputStream(localFs.create(file))); try { for (int i = 0; i < length; i++) { writer.write(Integer.toString(i * 2)); writer.write("\t"); writer.write(Integer.toString(i)); writer.write("\n"); } } finally { writer.close(); } // try splitting the file in a variety of sizes KeyValueTextInputFormat format = new KeyValueTextInputFormat(); assertTrue("KVTIF claims not splittable", format.isSplitable(job, file)); for (int i = 0; i < 3; i++) { int numSplits = random.nextInt(MAX_LENGTH / 2000) + 1; LOG.info("splitting: requesting = " + numSplits); List<InputSplit> splits = format.getSplits(job); LOG.info("splitting: got = " + splits.size()); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.size(); j++) { LOG.debug("split[" + j + "]= " + splits.get(j)); TaskAttemptContext context = MapReduceTestUtil .createDummyMapTaskAttemptContext(job.getConfiguration()); RecordReader<Text, Text> reader = format.createRecordReader(splits.get(j), context); Class<?> clazz = reader.getClass(); MapContext<Text, Text, Text, Text> mcontext = new MapContextImpl<Text, Text, Text, Text>( job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), splits.get(j)); reader.initialize(splits.get(j), mcontext); Text key = null; Text value = null; try { int count = 0; while (reader.nextKeyValue()) { key = reader.getCurrentKey(); value = reader.getCurrentValue(); final int k = Integer.parseInt(key.toString()); final int v = Integer.parseInt(value.toString()); assertEquals("Bad key", 0, k % 2); assertEquals("Mismatched key/value", k / 2, v); LOG.debug("read " + k + "," + v); assertFalse(k + "," + v + " in multiple partitions.", bits.get(v)); bits.set(v); count++; } if (count > 0) { LOG.info("splits[" + j + "]=" + splits.get(j) + " count=" + count); } else { LOG.debug("splits[" + j + "]=" + splits.get(j) + " count=" + count); } } finally { reader.close(); } } assertEquals("Some keys in no partition.", length, bits.cardinality()); } } }
From source file:org.apache.flink.streaming.connectors.kafka.KafkaConsumerTestBase.java
/** * Runs a job using the provided environment to read a sequence of records from a single Kafka topic. * The method allows to individually specify the expected starting offset and total read value count of each partition. * The job will be considered successful only if all partition read results match the start offset and value count criteria. */// ww w .j a v a 2 s .c o m protected void readSequence(final StreamExecutionEnvironment env, final StartupMode startupMode, final Map<KafkaTopicPartition, Long> specificStartupOffsets, final Properties cc, final String topicName, final Map<Integer, Tuple2<Integer, Integer>> partitionsToValuesCountAndStartOffset) throws Exception { final int sourceParallelism = partitionsToValuesCountAndStartOffset.keySet().size(); int finalCountTmp = 0; for (Map.Entry<Integer, Tuple2<Integer, Integer>> valuesCountAndStartOffset : partitionsToValuesCountAndStartOffset .entrySet()) { finalCountTmp += valuesCountAndStartOffset.getValue().f0; } final int finalCount = finalCountTmp; final TypeInformation<Tuple2<Integer, Integer>> intIntTupleType = TypeInfoParser .parse("Tuple2<Integer, Integer>"); final TypeInformationSerializationSchema<Tuple2<Integer, Integer>> deser = new TypeInformationSerializationSchema<>( intIntTupleType, env.getConfig()); // create the consumer cc.putAll(secureProps); FlinkKafkaConsumerBase<Tuple2<Integer, Integer>> consumer = kafkaServer.getConsumer(topicName, deser, cc); switch (startupMode) { case EARLIEST: consumer.setStartFromEarliest(); break; case LATEST: consumer.setStartFromLatest(); break; case SPECIFIC_OFFSETS: consumer.setStartFromSpecificOffsets(specificStartupOffsets); break; case GROUP_OFFSETS: consumer.setStartFromGroupOffsets(); break; } DataStream<Tuple2<Integer, Integer>> source = env.addSource(consumer).setParallelism(sourceParallelism) .map(new ThrottledMapper<Tuple2<Integer, Integer>>(20)).setParallelism(sourceParallelism); // verify data source.flatMap(new RichFlatMapFunction<Tuple2<Integer, Integer>, Integer>() { private HashMap<Integer, BitSet> partitionsToValueCheck; private int count = 0; @Override public void open(Configuration parameters) throws Exception { partitionsToValueCheck = new HashMap<>(); for (Integer partition : partitionsToValuesCountAndStartOffset.keySet()) { partitionsToValueCheck.put(partition, new BitSet()); } } @Override public void flatMap(Tuple2<Integer, Integer> value, Collector<Integer> out) throws Exception { int partition = value.f0; int val = value.f1; BitSet bitSet = partitionsToValueCheck.get(partition); if (bitSet == null) { throw new RuntimeException("Got a record from an unknown partition"); } else { bitSet.set(val - partitionsToValuesCountAndStartOffset.get(partition).f1); } count++; LOG.info("Received message {}, total {} messages", value, count); // verify if we've seen everything if (count == finalCount) { for (Map.Entry<Integer, BitSet> partitionsToValueCheck : this.partitionsToValueCheck .entrySet()) { BitSet check = partitionsToValueCheck.getValue(); int expectedValueCount = partitionsToValuesCountAndStartOffset .get(partitionsToValueCheck.getKey()).f0; if (check.cardinality() != expectedValueCount) { throw new RuntimeException("Expected cardinality to be " + expectedValueCount + ", but was " + check.cardinality()); } else if (check.nextClearBit(0) != expectedValueCount) { throw new RuntimeException("Expected next clear bit to be " + expectedValueCount + ", but was " + check.cardinality()); } } // test has passed throw new SuccessException(); } } }).setParallelism(1); tryExecute(env, "Read data from Kafka"); LOG.info("Successfully read sequence for verification"); }
From source file:com.turn.ttorrent.client.TorrentHandler.java
/** * Build this torrent's pieces array.// w ww . jav a 2s . c o m * * <p> * Hash and verify any potentially present local data and create this * torrent's pieces array from their respective hash provided in the * torrent meta-info. * </p> * * <p> * This function should be called soon after the constructor to initialize * the pieces array. * </p> */ @VisibleForTesting /* pp */ void init() throws InterruptedException, IOException { { State s = getState(); if (s != State.WAITING) { LOG.info("Restarting torrent from state " + s); return; } } setState(State.VALIDATING); try { int npieces = torrent.getPieceCount(); long size = getSize(); // Store in a local so we can update with minimal synchronization. BitSet completedPieces = new BitSet(npieces); long completedSize = 0; ThreadPoolExecutor executor = client.getEnvironment().getExecutorService(); // TorrentCreator.newExecutor("TorrentHandlerInit"); try { LOG.info("{}: Analyzing local data for {} ({} pieces)...", new Object[] { getLocalPeerName(), getName(), npieces }); int step = 10; CountDownLatch latch = new CountDownLatch(npieces); for (int index = 0; index < npieces; index++) { // TODO: Read the file sequentially and pass it to the validator. // Otherwise we thrash the disk on validation. ByteBuffer buffer = ByteBuffer.allocate(getPieceLength(index)); bucket.read(buffer, getPieceOffset(index)); buffer.flip(); executor.execute(new PieceValidator(torrent, index, buffer, completedPieces, latch)); if (index / (float) npieces * 100f > step) { LOG.info("{}: ... {}% complete", getLocalPeerName(), step); step += 10; } } latch.await(); for (int i = completedPieces.nextSetBit(0); i >= 0; i = completedPieces.nextSetBit(i + 1)) { completedSize += getPieceLength(i); } } finally { // Request orderly executor shutdown and wait for hashing tasks to // complete. // executor.shutdown(); // executor.awaitTermination(1, TimeUnit.SECONDS); } LOG.debug("{}: {}: we have {}/{} bytes ({}%) [{}/{} pieces].", new Object[] { getLocalPeerName(), getName(), completedSize, size, String.format("%.1f", (100f * (completedSize / (float) size))), completedPieces.cardinality(), getPieceCount() }); synchronized (lock) { this.completedPieces = completedPieces; } if (isComplete()) setState(State.SEEDING); else setState(State.SHARING); } catch (Exception e) { setState(State.ERROR); Throwables.propagateIfPossible(e, InterruptedException.class, IOException.class); throw Throwables.propagate(e); } }