List of usage examples for java.util BitSet cardinality
public int cardinality()
From source file:org.apache.carbondata.hadoop.api.CarbonTableInputFormat.java
/** * {@inheritDoc}//from w ww.jav a 2 s .co m * Configurations FileInputFormat.INPUT_DIR * are used to get table path to read. * * @param job * @return List<InputSplit> list of CarbonInputSplit * @throws IOException */ @Override public List<InputSplit> getSplits(JobContext job) throws IOException { AbsoluteTableIdentifier identifier = getAbsoluteTableIdentifier(job.getConfiguration()); CarbonTable carbonTable = getOrCreateCarbonTable(job.getConfiguration()); if (null == carbonTable) { throw new IOException("Missing/Corrupt schema file for table."); } this.readCommittedScope = getReadCommitted(job, identifier); LoadMetadataDetails[] loadMetadataDetails = readCommittedScope.getSegmentList(); SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(carbonTable, loadMetadataDetails); List<Segment> invalidSegments = new ArrayList<>(); List<UpdateVO> invalidTimestampsList = new ArrayList<>(); List<Segment> streamSegments = null; // get all valid segments and set them into the configuration SegmentStatusManager segmentStatusManager = new SegmentStatusManager(identifier); SegmentStatusManager.ValidAndInvalidSegmentsInfo segments = segmentStatusManager .getValidAndInvalidSegments(loadMetadataDetails, this.readCommittedScope); // to check whether only streaming segments access is enabled or not, // if access streaming segment is true then data will be read from streaming segments boolean accessStreamingSegments = getAccessStreamingSegments(job.getConfiguration()); if (getValidateSegmentsToAccess(job.getConfiguration())) { if (!accessStreamingSegments) { List<Segment> validSegments = segments.getValidSegments(); streamSegments = segments.getStreamSegments(); streamSegments = getFilteredSegment(job, streamSegments, true, readCommittedScope); if (validSegments.size() == 0) { return getSplitsOfStreaming(job, identifier, streamSegments); } List<Segment> filteredSegmentToAccess = getFilteredSegment(job, segments.getValidSegments(), true, readCommittedScope); if (filteredSegmentToAccess.size() == 0) { return getSplitsOfStreaming(job, identifier, streamSegments); } else { setSegmentsToAccess(job.getConfiguration(), filteredSegmentToAccess); } } else { List<Segment> filteredNormalSegments = getFilteredNormalSegments(job, segments.getValidSegments(), getSegmentsToAccess(job, readCommittedScope)); streamSegments = segments.getStreamSegments(); if (filteredNormalSegments.size() == 0) { return getSplitsOfStreaming(job, identifier, streamSegments); } setSegmentsToAccess(job.getConfiguration(), filteredNormalSegments); } // remove entry in the segment index if there are invalid segments invalidSegments.addAll(segments.getInvalidSegments()); for (Segment invalidSegmentId : invalidSegments) { invalidTimestampsList .add(updateStatusManager.getInvalidTimestampRange(invalidSegmentId.getSegmentNo())); } if (invalidSegments.size() > 0) { DataMapStoreManager.getInstance() .clearInvalidSegments(getOrCreateCarbonTable(job.getConfiguration()), invalidSegments); } } List<Segment> validAndInProgressSegments = new ArrayList<>(segments.getValidSegments()); // Add in progress segments also to filter it as in case of aggregate table load it loads // data from in progress table. validAndInProgressSegments.addAll(segments.getListOfInProgressSegments()); // get updated filtered list List<Segment> filteredSegmentToAccess = getFilteredSegment(job, new ArrayList<>(validAndInProgressSegments), false, readCommittedScope); // Clean the updated segments from memory if the update happens on segments List<Segment> toBeCleanedSegments = new ArrayList<>(); for (Segment filteredSegment : filteredSegmentToAccess) { boolean refreshNeeded = DataMapStoreManager.getInstance().getTableSegmentRefresher(carbonTable) .isRefreshNeeded(filteredSegment, updateStatusManager.getInvalidTimestampRange(filteredSegment.getSegmentNo())); if (refreshNeeded) { toBeCleanedSegments.add(filteredSegment); } } // Clean segments if refresh is needed for (Segment segment : filteredSegmentToAccess) { if (DataMapStoreManager.getInstance().getTableSegmentRefresher(carbonTable) .isRefreshNeeded(segment.getSegmentNo())) { toBeCleanedSegments.add(segment); } } if (toBeCleanedSegments.size() > 0) { DataMapStoreManager.getInstance().clearInvalidSegments(getOrCreateCarbonTable(job.getConfiguration()), toBeCleanedSegments); } // process and resolve the expression Expression filter = getFilterPredicates(job.getConfiguration()); // this will be null in case of corrupt schema file. PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getTableName()); carbonTable.processFilterExpression(filter, null, null); // prune partitions for filter query on partition table BitSet matchedPartitions = null; if (partitionInfo != null && partitionInfo.getPartitionType() != PartitionType.NATIVE_HIVE) { matchedPartitions = setMatchedPartitions(null, filter, partitionInfo, null); if (matchedPartitions != null) { if (matchedPartitions.cardinality() == 0) { return new ArrayList<InputSplit>(); } else if (matchedPartitions.cardinality() == partitionInfo.getNumPartitions()) { matchedPartitions = null; } } } FilterResolverIntf filterInterface = carbonTable.resolveFilter(filter); // do block filtering and get split List<InputSplit> splits = getSplits(job, filterInterface, filteredSegmentToAccess, matchedPartitions, partitionInfo, null, updateStatusManager); // pass the invalid segment to task side in order to remove index entry in task side if (invalidSegments.size() > 0) { for (InputSplit split : splits) { ((org.apache.carbondata.hadoop.CarbonInputSplit) split).setInvalidSegments(invalidSegments); ((org.apache.carbondata.hadoop.CarbonInputSplit) split) .setInvalidTimestampRange(invalidTimestampsList); } } // add all splits of streaming List<InputSplit> splitsOfStreaming = getSplitsOfStreaming(job, identifier, streamSegments); if (!splitsOfStreaming.isEmpty()) { splits.addAll(splitsOfStreaming); } return splits; }
From source file:com.bittorrent.mpetazzoni.client.SharedTorrent.java
/** * Peer ready handler./*from w ww . j a v a2s . c o m*/ * * <p> * When a peer becomes ready to accept piece block requests, select a piece * to download and go for it. * </p> * * @param peer The peer that became ready. */ @Override public synchronized void handlePeerReady(SharingPeer peer) { BitSet interesting = peer.getAvailablePieces(); interesting.andNot(this.completedPieces); interesting.andNot(this.requestedPieces); logger.trace("Peer {} is ready and has {} interesting piece(s).", peer, interesting.cardinality()); // If we didn't find interesting pieces, we need to check if we're in // an end-game situation. If yes, we request an already requested piece // to try to speed up the end. if (interesting.cardinality() == 0) { interesting = peer.getAvailablePieces(); interesting.andNot(this.completedPieces); if (interesting.cardinality() == 0) { logger.trace("No interesting piece from {}!", peer); return; } if (this.completedPieces.cardinality() < ENG_GAME_COMPLETION_RATIO * this.pieces.length) { logger.trace("Not far along enough to warrant end-game mode."); return; } logger.trace("Possible end-game, we're about to request a piece " + "that was already requested from another peer."); } // Extract the RAREST_PIECE_JITTER rarest pieces from the interesting // pieces of this peer. ArrayList<Piece> choice = new ArrayList<Piece>(RAREST_PIECE_JITTER); synchronized (this.rarest) { for (Piece piece : this.rarest) { if (interesting.get(piece.getIndex())) { choice.add(piece); if (choice.size() >= RAREST_PIECE_JITTER) { break; } } } } Piece chosen = choice.get(this.random.nextInt(Math.min(choice.size(), RAREST_PIECE_JITTER))); this.requestedPieces.set(chosen.getIndex()); logger.trace("Requesting {} from {}, we now have {} " + "outstanding request(s): {}", new Object[] { chosen, peer, this.requestedPieces.cardinality(), this.requestedPieces }); peer.downloadPiece(chosen); }
From source file:com.turn.ttorrent.client.SharedTorrent.java
/** * Peer ready handler.//from w ww. j ava2 s .c o m * * <p> * When a peer becomes ready to accept piece block requests, select a piece * to download and go for it. * </p> * * @param peer The peer that became ready. */ @Override public synchronized void handlePeerReady(SharingPeer peer) { BitSet interesting = peer.getAvailablePieces(); interesting.andNot(this.completedPieces); interesting.andNot(this.requestedPieces); logger.trace("Peer {} is ready and has {} interesting piece(s).", peer, interesting.cardinality()); // If we didn't find interesting pieces, we need to check if we're in // an end-game situation. If yes, we request an already requested piece // to try to speed up the end. if (interesting.cardinality() == 0) { interesting = peer.getAvailablePieces(); interesting.andNot(this.completedPieces); if (interesting.cardinality() == 0) { logger.trace("No interesting piece from {}!", peer); return; } if (this.completedPieces.cardinality() < ENG_GAME_COMPLETION_RATIO * this.pieces.length) { logger.trace("Not far along enough to warrant end-game mode."); return; } logger.trace("Possible end-game, we're about to request a piece " + "that was already requested from another peer."); } Piece chosen = requestStrategy.choosePiece(rarest, interesting, pieces); this.requestedPieces.set(chosen.getIndex()); logger.trace("Requesting {} from {}, we now have {} " + "outstanding request(s): {}", new Object[] { chosen, peer, this.requestedPieces.cardinality(), this.requestedPieces }); peer.downloadPiece(chosen); }
From source file:org.apache.tez.runtime.library.output.OrderedPartitionedKVOutput.java
protected List<Event> generateEventsOnClose() throws IOException { DataMovementEventPayloadProto.Builder payloadBuilder = DataMovementEventPayloadProto.newBuilder(); boolean outputGenerated = true; if (sendEmptyPartitionDetails) { Path indexFile = sorter.getMapOutput().getOutputIndexFile(); TezSpillRecord spillRecord = new TezSpillRecord(indexFile, conf); BitSet emptyPartitionDetails = new BitSet(); int emptyPartitions = 0; for (int i = 0; i < spillRecord.size(); i++) { TezIndexRecord indexRecord = spillRecord.getIndex(i); if (!indexRecord.hasData()) { emptyPartitionDetails.set(i); emptyPartitions++;//from w ww.ja v a 2 s.c o m } } outputGenerated = (spillRecord.size() != emptyPartitions); if (emptyPartitions > 0) { ByteString emptyPartitionsBytesString = TezCommonUtils .compressByteArrayToByteString(TezUtilsInternal.toByteArray(emptyPartitionDetails)); payloadBuilder.setEmptyPartitions(emptyPartitionsBytesString); LOG.info("EmptyPartition bitsetSize=" + emptyPartitionDetails.cardinality() + ", numOutputs=" + getNumPhysicalOutputs() + ", emptyPartitions=" + emptyPartitions + ", compressedSize=" + emptyPartitionsBytesString.size()); } } if (!sendEmptyPartitionDetails || outputGenerated) { String host = System.getenv(ApplicationConstants.Environment.NM_HOST.toString()); ByteBuffer shuffleMetadata = getContext() .getServiceProviderMetaData(ShuffleUtils.SHUFFLE_HANDLER_SERVICE_ID); int shufflePort = ShuffleUtils.deserializeShuffleProviderMetaData(shuffleMetadata); payloadBuilder.setHost(host); payloadBuilder.setPort(shufflePort); payloadBuilder.setPathComponent(getContext().getUniqueIdentifier()); } payloadBuilder.setRunDuration((int) ((endTime - startTime) / 1000)); DataMovementEventPayloadProto payloadProto = payloadBuilder.build(); ByteBuffer payload = payloadProto.toByteString().asReadOnlyByteBuffer(); long outputSize = getContext().getCounters().findCounter(TaskCounter.OUTPUT_BYTES).getValue(); VertexManagerEventPayloadProto.Builder vmBuilder = VertexManagerEventPayloadProto.newBuilder(); vmBuilder.setOutputSize(outputSize); VertexManagerEvent vmEvent = VertexManagerEvent.create(getContext().getDestinationVertexName(), vmBuilder.build().toByteString().asReadOnlyByteBuffer()); List<Event> events = Lists.newArrayListWithCapacity(getNumPhysicalOutputs() + 1); events.add(vmEvent); CompositeDataMovementEvent csdme = CompositeDataMovementEvent.create(0, getNumPhysicalOutputs(), payload); events.add(csdme); return events; }
From source file:org.apache.hadoop.mapred.TestCombineTextInputFormat.java
@Test(timeout = 10000) public void testFormat() throws Exception { JobConf job = new JobConf(defaultConf); Random random = new Random(); long seed = random.nextLong(); LOG.info("seed = " + seed); random.setSeed(seed);//from w w w .ja v a 2 s . com localFs.delete(workDir, true); FileInputFormat.setInputPaths(job, workDir); final int length = 10000; final int numFiles = 10; createFiles(length, numFiles, random); // create a combined split for the files CombineTextInputFormat format = new CombineTextInputFormat(); LongWritable key = new LongWritable(); Text value = new Text(); for (int i = 0; i < 3; i++) { int numSplits = random.nextInt(length / 20) + 1; LOG.info("splitting: requesting = " + numSplits); InputSplit[] splits = format.getSplits(job, numSplits); LOG.info("splitting: got = " + splits.length); // we should have a single split as the length is comfortably smaller than // the block size assertEquals("We got more than one splits!", 1, splits.length); InputSplit split = splits[0]; assertEquals("It should be CombineFileSplit", CombineFileSplit.class, split.getClass()); // check the split BitSet bits = new BitSet(length); LOG.debug("split= " + split); RecordReader<LongWritable, Text> reader = format.getRecordReader(split, job, voidReporter); try { int count = 0; while (reader.next(key, value)) { int v = Integer.parseInt(value.toString()); LOG.debug("read " + v); if (bits.get(v)) { LOG.warn("conflict with " + v + " at position " + reader.getPos()); } assertFalse("Key in multiple partitions.", bits.get(v)); bits.set(v); count++; } LOG.info("splits=" + split + " count=" + count); } finally { reader.close(); } assertEquals("Some keys in no partition.", length, bits.cardinality()); } }
From source file:net.sf.extjwnl.princeton.file.PrincetonRandomAccessDictionaryFile.java
private String renderSynset(Synset synset) { int estLength = offsetLength + 1//offset + 2 + 1 //lexfilenum + 1//ss_type + offsetLength + 1//w_cnt + (10 + 3 + 1) * synset.getWords().size()//avg word 10 chars + lex_id max 3 chars + offsetLength + 1//p_cnt + (1 + 1 + offsetLength + 1 + 1 + 1 + 4 + 1) * synset.getPointers().size() + synset.getGloss().length() + 2 + 2; if (POS.VERB == synset.getPOS()) { estLength = estLength + 8 * synset.getWords().size();//8 for verb flag, about one per word }// w ww .j a va2s .c o m //synset_offset lex_filenum ss_type w_cnt word lex_id [word lex_id...] p_cnt [ptr...] [frames...] | gloss //w_cnt Two digit hexadecimal integer indicating the number of words in the synset. String posKey = synset.getPOS().getKey(); if (POS.ADJECTIVE == synset.getPOS() && synset.isAdjectiveCluster()) { posKey = POS.ADJECTIVE_SATELLITE_KEY; } if (checkLexFileNumber && log.isWarnEnabled() && !LexFileIdFileNameMap.getMap().containsKey(synset.getLexFileNum())) { log.warn(JWNL.resolveMessage("PRINCETON_WARN_001", synset.getLexFileNum())); } if (checkWordCountLimit && log.isWarnEnabled() && (0xFF < synset.getWords().size())) { log.warn(JWNL.resolveMessage("PRINCETON_WARN_004", new Object[] { synset.getOffset(), synset.getWords().size() })); } StringBuilder result = new StringBuilder(estLength); formatOffset(synset.getOffset(), offsetLength, result); if (synset.getLexFileNum() < 10) { result.append(" 0").append(synset.getLexFileNum()); } else { result.append(" ").append(synset.getLexFileNum()); } result.append(" ").append(posKey); if (synset.getWords().size() < 0x10) { result.append(" 0").append(Integer.toHexString(synset.getWords().size())).append(" "); } else { result.append(" ").append(Integer.toHexString(synset.getWords().size())).append(" "); } for (Word w : synset.getWords()) { //ASCII form of a word as entered in the synset by the lexicographer, with spaces replaced by underscore characters (_ ). The text of the word is case sensitive. //lex_id One digit hexadecimal integer that, when appended onto lemma , uniquely identifies a sense within a lexicographer file. String lemma = w.getLemma().replace(' ', '_'); if (w instanceof Adjective) { Adjective a = (Adjective) w; if (AdjectivePosition.NONE != a.getAdjectivePosition()) { lemma = lemma + "(" + a.getAdjectivePosition().getKey() + ")"; } } if (checkLexIdLimit && log.isWarnEnabled() && (0xF < w.getLexId())) { log.warn(JWNL.resolveMessage("PRINCETON_WARN_005", new Object[] { synset.getOffset(), w.getLemma(), w.getLexId() })); } result.append(lemma).append(" "); result.append(Long.toHexString(w.getLexId())).append(" "); } //Three digit decimal integer indicating the number of pointers from this synset to other synsets. If p_cnt is 000 the synset has no pointers. if (checkRelationLimit && log.isWarnEnabled() && (999 < synset.getPointers().size())) { log.warn(JWNL.resolveMessage("PRINCETON_WARN_002", new Object[] { synset.getOffset(), synset.getPointers().size() })); } if (synset.getPointers().size() < 100) { result.append("0"); if (synset.getPointers().size() < 10) { result.append("0"); } } result.append(synset.getPointers().size()).append(" "); for (Pointer p : synset.getPointers()) { //pointer_symbol synset_offset pos source/target result.append(p.getType().getKey()).append(" "); //synset_offset is the byte offset of the target synset in the data file corresponding to pos formatOffset(p.getTargetOffset(), offsetLength, result); result.append(" "); //pos result.append(p.getTargetPOS().getKey()).append(" "); //source/target //The source/target field distinguishes lexical and semantic pointers. // It is a four byte field, containing two two-digit hexadecimal integers. // The first two digits indicates the word number in the current (source) synset, // the last two digits indicate the word number in the target synset. // A value of 0000 means that pointer_symbol represents a semantic relation between the current (source) synset and the target synset indicated by synset_offset . //A lexical relation between two words in different synsets is represented by non-zero values in the source and target word numbers. // The first and last two bytes of this field indicate the word numbers in the source and target synsets, respectively, between which the relation holds. // Word numbers are assigned to the word fields in a synset, from left to right, beginning with 1 . if (checkPointerIndexLimit && log.isWarnEnabled() && (0xFF < p.getSourceIndex())) { log.warn(JWNL.resolveMessage("PRINCETON_WARN_006", new Object[] { synset.getOffset(), p.getSource().getSynset().getOffset(), p.getSourceIndex() })); } if (checkPointerIndexLimit && log.isWarnEnabled() && (0xFF < p.getTargetIndex())) { log.warn(JWNL.resolveMessage("PRINCETON_WARN_006", new Object[] { synset.getOffset(), p.getTarget().getSynset().getOffset(), p.getTargetIndex() })); } if (p.getSourceIndex() < 0x10) { result.append("0"); } result.append(Integer.toHexString(p.getSourceIndex())); if (p.getTargetIndex() < 0x10) { result.append("0"); } result.append(Integer.toHexString(p.getTargetIndex())).append(" "); } //frames In data.verb only if (POS.VERB == synset.getPOS()) { BitSet verbFrames = synset.getVerbFrameFlags(); int verbFramesCount = verbFrames.cardinality(); for (Word word : synset.getWords()) { if (word instanceof Verb) { BitSet bits = ((Verb) word).getVerbFrameFlags(); for (int i = bits.nextSetBit(0); i >= 0; i = bits.nextSetBit(i + 1)) { //WN TRICK - there are duplicates in data //02593551 41 v 04 lord_it_over 0 queen_it_over 0 put_on_airs 0 act_superior 0 001 @ 02367363 v 0000 // 09 + 02 00 + 02 04 + 22 04 + 02 03 + 22 03 + 08 02 + 09 02 + 08 01 + 09 01 | act like the master of; "He is lording it over the students" // + 02 04 and + 02 03 duplicate + 02 00 // it is the only one, but it causes offsets to differ on WN30 rewrite if (!verbFrames.get(i)) { verbFramesCount++; } } } } if (checkVerbFrameLimit && log.isWarnEnabled() && (99 < verbFramesCount)) { log.warn(JWNL.resolveMessage("PRINCETON_WARN_007", new Object[] { synset.getOffset(), verbFramesCount })); } if (verbFramesCount < 10) { result.append("0"); } result.append(Integer.toString(verbFramesCount)).append(" "); for (int i = verbFrames.nextSetBit(0); i >= 0; i = verbFrames.nextSetBit(i + 1)) { if (checkVerbFrameLimit && log.isWarnEnabled() && (99 < i)) { log.warn(JWNL.resolveMessage("PRINCETON_WARN_008", new Object[] { synset.getOffset(), i })); } result.append("+ "); if (i < 10) { result.append("0"); } result.append(Integer.toString(i)); result.append(" 00 "); } for (Word word : synset.getWords()) { if (word instanceof Verb) { BitSet bits = ((Verb) word).getVerbFrameFlags(); for (int i = bits.nextSetBit(0); i >= 0; i = bits.nextSetBit(i + 1)) { if (!verbFrames.get(i)) { if (checkVerbFrameLimit && log.isWarnEnabled() && (0xFF < word.getIndex())) { log.warn(JWNL.resolveMessage("PRINCETON_WARN_008", new Object[] { synset.getOffset(), word.getIndex() })); } result.append("+ "); if (i < 10) { result.append("0"); } result.append(Integer.toString(i)).append(" "); if (word.getIndex() < 0x10) { result.append("0"); } result.append(Integer.toHexString(word.getIndex())).append(" "); } } } } } result.append("| ").append(synset.getGloss()).append(" ");//why every line in most WN files ends with two spaces? return result.toString(); }
From source file:org.apache.hadoop.mapred.TestSequenceFileInputFormat.java
public void testFormat() throws Exception { JobConf job = new JobConf(conf); FileSystem fs = FileSystem.getLocal(conf); Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred"); Path file = new Path(dir, "test.seq"); Reporter reporter = Reporter.NULL;/*from w w w. j a v a 2 s .c o m*/ int seed = new Random().nextInt(); //LOG.info("seed = "+seed); Random random = new Random(seed); fs.delete(dir, true); FileInputFormat.setInputPaths(job, dir); // for a variety of lengths for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) { //LOG.info("creating; entries = " + length); // create a file with length entries SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, BytesWritable.class); try { for (int i = 0; i < length; i++) { IntWritable key = new IntWritable(i); byte[] data = new byte[random.nextInt(10)]; random.nextBytes(data); BytesWritable value = new BytesWritable(data); writer.append(key, value); } } finally { writer.close(); } // try splitting the file in a variety of sizes InputFormat<IntWritable, BytesWritable> format = new SequenceFileInputFormat<IntWritable, BytesWritable>(); IntWritable key = new IntWritable(); BytesWritable value = new BytesWritable(); for (int i = 0; i < 3; i++) { int numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1; //LOG.info("splitting: requesting = " + numSplits); InputSplit[] splits = format.getSplits(job, numSplits); //LOG.info("splitting: got = " + splits.length); // check each split BitSet bits = new BitSet(length); for (int j = 0; j < splits.length; j++) { RecordReader<IntWritable, BytesWritable> reader = format.getRecordReader(splits[j], job, reporter); try { int count = 0; while (reader.next(key, value)) { // if (bits.get(key.get())) { // LOG.info("splits["+j+"]="+splits[j]+" : " + key.get()); // LOG.info("@"+reader.getPos()); // } assertFalse("Key in multiple partitions.", bits.get(key.get())); bits.set(key.get()); count++; } //LOG.info("splits["+j+"]="+splits[j]+" count=" + count); } finally { reader.close(); } } assertEquals("Some keys in no partition.", length, bits.cardinality()); } } }
From source file:org.apache.hadoop.mapred.TestMultiFileInputFormat.java
public void testFormat() throws IOException { if (LOG.isInfoEnabled()) { LOG.info("Test started"); LOG.info("Max split count = " + MAX_SPLIT_COUNT); LOG.info("Split count increment = " + SPLIT_COUNT_INCR); LOG.info("Max bytes per file = " + MAX_BYTES); LOG.info("Max number of files = " + MAX_NUM_FILES); LOG.info("Number of files increment = " + NUM_FILES_INCR); }/* w ww .j a va 2 s . c o m*/ MultiFileInputFormat<Text, Text> format = new DummyMultiFileInputFormat(); FileSystem fs = FileSystem.getLocal(job); for (int numFiles = 1; numFiles < MAX_NUM_FILES; numFiles += (NUM_FILES_INCR / 2) + rand.nextInt(NUM_FILES_INCR / 2)) { Path dir = initFiles(fs, numFiles, -1); BitSet bits = new BitSet(numFiles); for (int i = 1; i < MAX_SPLIT_COUNT; i += rand.nextInt(SPLIT_COUNT_INCR) + 1) { LOG.info("Running for Num Files=" + numFiles + ", split count=" + i); MultiFileSplit[] splits = (MultiFileSplit[]) format.getSplits(job, i); bits.clear(); for (MultiFileSplit split : splits) { long splitLength = 0; for (Path p : split.getPaths()) { long length = fs.getContentSummary(p).getLength(); assertEquals(length, lengths.get(p.getName()).longValue()); splitLength += length; String name = p.getName(); int index = Integer.parseInt(name.substring(name.lastIndexOf("file_") + 5)); assertFalse(bits.get(index)); bits.set(index); } assertEquals(splitLength, split.getLength()); } } assertEquals(bits.cardinality(), numFiles); fs.delete(dir, true); } LOG.info("Test Finished"); }
From source file:org.apache.nutch.tools.PruneIndexTool.java
/** * For each query, find all matching documents and delete them from all input * indexes. Optionally, an additional check can be performed by using {@link PruneChecker} * implementations.// www . j av a 2 s. c o m */ public void run() { BitSet bits = new BitSet(reader.maxDoc()); AllHitsCollector ahc = new AllHitsCollector(bits); boolean doDelete = false; for (int i = 0; i < queries.length; i++) { if (LOG.isInfoEnabled()) { LOG.info(dr + "Processing query: " + queries[i].toString()); } bits.clear(); try { searcher.search(queries[i], ahc); } catch (IOException e) { if (LOG.isWarnEnabled()) { LOG.warn(dr + " - failed: " + e.getMessage()); } continue; } if (bits.cardinality() == 0) { if (LOG.isInfoEnabled()) { LOG.info(dr + " - no matching documents."); } continue; } if (LOG.isInfoEnabled()) { LOG.info(dr + " - found " + bits.cardinality() + " document(s)."); } // Now delete all matching documents int docNum = -1, start = 0, cnt = 0; // probably faster than looping sequentially through all index values? while ((docNum = bits.nextSetBit(start)) != -1) { // don't delete the same document multiple times if (reader.isDeleted(docNum)) continue; try { if (checkers != null && checkers.length > 0) { boolean check = true; for (int k = 0; k < checkers.length; k++) { // fail if any checker returns false check &= checkers[k].isPrunable(queries[i], reader, docNum); } doDelete = check; } else doDelete = true; if (doDelete) { if (!dryrun) reader.deleteDocument(docNum); cnt++; } } catch (Exception e) { if (LOG.isWarnEnabled()) { LOG.warn(dr + " - failed to delete doc #" + docNum); } } start = docNum + 1; } if (LOG.isInfoEnabled()) { LOG.info(dr + " - deleted " + cnt + " document(s)."); } } // close checkers if (checkers != null) { for (int i = 0; i < checkers.length; i++) { checkers[i].close(); } } try { reader.close(); } catch (IOException e) { if (LOG.isWarnEnabled()) { LOG.warn(dr + "Exception when closing reader(s): " + e.getMessage()); } } }
From source file:model.DecomposableModel.java
/** * Compute the difference in the entropy from this model, to one that would * add vertex1 and vertex2 to it//from ww w. ja v a 2 s. c o m * * @param a * @param b * @param computer * @return */ public int treeWidthIfAdding(Integer a, Integer b) { // System.out.println("computing actual entropy"); BitSet Sab = graph.getSeparator(a, b); BitSet Sabuaub = (BitSet) Sab.clone(); Sabuaub.set(a); Sabuaub.set(b); return Sabuaub.cardinality(); }