Example usage for java.util BitSet cardinality

List of usage examples for java.util BitSet cardinality

Introduction

In this page you can find the example usage for java.util BitSet cardinality.

Prototype

public int cardinality() 

Source Link

Document

Returns the number of bits set to true in this BitSet .

Usage

From source file:org.apache.carbondata.hadoop.api.CarbonTableInputFormat.java

/**
 * {@inheritDoc}//from  w ww.jav  a  2  s  .co  m
 * Configurations FileInputFormat.INPUT_DIR
 * are used to get table path to read.
 *
 * @param job
 * @return List<InputSplit> list of CarbonInputSplit
 * @throws IOException
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    AbsoluteTableIdentifier identifier = getAbsoluteTableIdentifier(job.getConfiguration());

    CarbonTable carbonTable = getOrCreateCarbonTable(job.getConfiguration());
    if (null == carbonTable) {
        throw new IOException("Missing/Corrupt schema file for table.");
    }
    this.readCommittedScope = getReadCommitted(job, identifier);
    LoadMetadataDetails[] loadMetadataDetails = readCommittedScope.getSegmentList();

    SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(carbonTable,
            loadMetadataDetails);
    List<Segment> invalidSegments = new ArrayList<>();
    List<UpdateVO> invalidTimestampsList = new ArrayList<>();
    List<Segment> streamSegments = null;
    // get all valid segments and set them into the configuration
    SegmentStatusManager segmentStatusManager = new SegmentStatusManager(identifier);
    SegmentStatusManager.ValidAndInvalidSegmentsInfo segments = segmentStatusManager
            .getValidAndInvalidSegments(loadMetadataDetails, this.readCommittedScope);

    // to check whether only streaming segments access is enabled or not,
    // if access streaming segment is true then data will be read from streaming segments
    boolean accessStreamingSegments = getAccessStreamingSegments(job.getConfiguration());
    if (getValidateSegmentsToAccess(job.getConfiguration())) {
        if (!accessStreamingSegments) {
            List<Segment> validSegments = segments.getValidSegments();
            streamSegments = segments.getStreamSegments();
            streamSegments = getFilteredSegment(job, streamSegments, true, readCommittedScope);
            if (validSegments.size() == 0) {
                return getSplitsOfStreaming(job, identifier, streamSegments);
            }
            List<Segment> filteredSegmentToAccess = getFilteredSegment(job, segments.getValidSegments(), true,
                    readCommittedScope);
            if (filteredSegmentToAccess.size() == 0) {
                return getSplitsOfStreaming(job, identifier, streamSegments);
            } else {
                setSegmentsToAccess(job.getConfiguration(), filteredSegmentToAccess);
            }
        } else {
            List<Segment> filteredNormalSegments = getFilteredNormalSegments(job, segments.getValidSegments(),
                    getSegmentsToAccess(job, readCommittedScope));
            streamSegments = segments.getStreamSegments();
            if (filteredNormalSegments.size() == 0) {
                return getSplitsOfStreaming(job, identifier, streamSegments);
            }
            setSegmentsToAccess(job.getConfiguration(), filteredNormalSegments);
        }
        // remove entry in the segment index if there are invalid segments
        invalidSegments.addAll(segments.getInvalidSegments());
        for (Segment invalidSegmentId : invalidSegments) {
            invalidTimestampsList
                    .add(updateStatusManager.getInvalidTimestampRange(invalidSegmentId.getSegmentNo()));
        }
        if (invalidSegments.size() > 0) {
            DataMapStoreManager.getInstance()
                    .clearInvalidSegments(getOrCreateCarbonTable(job.getConfiguration()), invalidSegments);
        }
    }
    List<Segment> validAndInProgressSegments = new ArrayList<>(segments.getValidSegments());
    // Add in progress segments also to filter it as in case of aggregate table load it loads
    // data from in progress table.
    validAndInProgressSegments.addAll(segments.getListOfInProgressSegments());
    // get updated filtered list
    List<Segment> filteredSegmentToAccess = getFilteredSegment(job, new ArrayList<>(validAndInProgressSegments),
            false, readCommittedScope);
    // Clean the updated segments from memory if the update happens on segments
    List<Segment> toBeCleanedSegments = new ArrayList<>();
    for (Segment filteredSegment : filteredSegmentToAccess) {
        boolean refreshNeeded = DataMapStoreManager.getInstance().getTableSegmentRefresher(carbonTable)
                .isRefreshNeeded(filteredSegment,
                        updateStatusManager.getInvalidTimestampRange(filteredSegment.getSegmentNo()));
        if (refreshNeeded) {
            toBeCleanedSegments.add(filteredSegment);
        }
    }
    // Clean segments if refresh is needed
    for (Segment segment : filteredSegmentToAccess) {
        if (DataMapStoreManager.getInstance().getTableSegmentRefresher(carbonTable)
                .isRefreshNeeded(segment.getSegmentNo())) {
            toBeCleanedSegments.add(segment);
        }
    }

    if (toBeCleanedSegments.size() > 0) {
        DataMapStoreManager.getInstance().clearInvalidSegments(getOrCreateCarbonTable(job.getConfiguration()),
                toBeCleanedSegments);
    }

    // process and resolve the expression
    Expression filter = getFilterPredicates(job.getConfiguration());
    // this will be null in case of corrupt schema file.
    PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getTableName());
    carbonTable.processFilterExpression(filter, null, null);

    // prune partitions for filter query on partition table
    BitSet matchedPartitions = null;
    if (partitionInfo != null && partitionInfo.getPartitionType() != PartitionType.NATIVE_HIVE) {
        matchedPartitions = setMatchedPartitions(null, filter, partitionInfo, null);
        if (matchedPartitions != null) {
            if (matchedPartitions.cardinality() == 0) {
                return new ArrayList<InputSplit>();
            } else if (matchedPartitions.cardinality() == partitionInfo.getNumPartitions()) {
                matchedPartitions = null;
            }
        }
    }

    FilterResolverIntf filterInterface = carbonTable.resolveFilter(filter);

    // do block filtering and get split
    List<InputSplit> splits = getSplits(job, filterInterface, filteredSegmentToAccess, matchedPartitions,
            partitionInfo, null, updateStatusManager);
    // pass the invalid segment to task side in order to remove index entry in task side
    if (invalidSegments.size() > 0) {
        for (InputSplit split : splits) {
            ((org.apache.carbondata.hadoop.CarbonInputSplit) split).setInvalidSegments(invalidSegments);
            ((org.apache.carbondata.hadoop.CarbonInputSplit) split)
                    .setInvalidTimestampRange(invalidTimestampsList);
        }
    }

    // add all splits of streaming
    List<InputSplit> splitsOfStreaming = getSplitsOfStreaming(job, identifier, streamSegments);
    if (!splitsOfStreaming.isEmpty()) {
        splits.addAll(splitsOfStreaming);
    }
    return splits;
}

From source file:com.bittorrent.mpetazzoni.client.SharedTorrent.java

/**
 * Peer ready handler./*from w  ww .  j a  v  a2s . c o m*/
 *
 * <p>
 * When a peer becomes ready to accept piece block requests, select a piece
 * to download and go for it.
 * </p>
 *
 * @param peer The peer that became ready.
 */
@Override
public synchronized void handlePeerReady(SharingPeer peer) {
    BitSet interesting = peer.getAvailablePieces();
    interesting.andNot(this.completedPieces);
    interesting.andNot(this.requestedPieces);

    logger.trace("Peer {} is ready and has {} interesting piece(s).", peer, interesting.cardinality());

    // If we didn't find interesting pieces, we need to check if we're in
    // an end-game situation. If yes, we request an already requested piece
    // to try to speed up the end.
    if (interesting.cardinality() == 0) {
        interesting = peer.getAvailablePieces();
        interesting.andNot(this.completedPieces);
        if (interesting.cardinality() == 0) {
            logger.trace("No interesting piece from {}!", peer);
            return;
        }

        if (this.completedPieces.cardinality() < ENG_GAME_COMPLETION_RATIO * this.pieces.length) {
            logger.trace("Not far along enough to warrant end-game mode.");
            return;
        }

        logger.trace("Possible end-game, we're about to request a piece "
                + "that was already requested from another peer.");
    }

    // Extract the RAREST_PIECE_JITTER rarest pieces from the interesting
    // pieces of this peer.
    ArrayList<Piece> choice = new ArrayList<Piece>(RAREST_PIECE_JITTER);
    synchronized (this.rarest) {
        for (Piece piece : this.rarest) {
            if (interesting.get(piece.getIndex())) {
                choice.add(piece);
                if (choice.size() >= RAREST_PIECE_JITTER) {
                    break;
                }
            }
        }
    }

    Piece chosen = choice.get(this.random.nextInt(Math.min(choice.size(), RAREST_PIECE_JITTER)));
    this.requestedPieces.set(chosen.getIndex());

    logger.trace("Requesting {} from {}, we now have {} " + "outstanding request(s): {}",
            new Object[] { chosen, peer, this.requestedPieces.cardinality(), this.requestedPieces });

    peer.downloadPiece(chosen);
}

From source file:com.turn.ttorrent.client.SharedTorrent.java

/**
 * Peer ready handler.//from  w  ww. j ava2 s .c o  m
 *
 * <p>
 * When a peer becomes ready to accept piece block requests, select a piece
 * to download and go for it.
 * </p>
 *
 * @param peer The peer that became ready.
 */
@Override
public synchronized void handlePeerReady(SharingPeer peer) {
    BitSet interesting = peer.getAvailablePieces();
    interesting.andNot(this.completedPieces);
    interesting.andNot(this.requestedPieces);

    logger.trace("Peer {} is ready and has {} interesting piece(s).", peer, interesting.cardinality());

    // If we didn't find interesting pieces, we need to check if we're in
    // an end-game situation. If yes, we request an already requested piece
    // to try to speed up the end.
    if (interesting.cardinality() == 0) {
        interesting = peer.getAvailablePieces();
        interesting.andNot(this.completedPieces);
        if (interesting.cardinality() == 0) {
            logger.trace("No interesting piece from {}!", peer);
            return;
        }

        if (this.completedPieces.cardinality() < ENG_GAME_COMPLETION_RATIO * this.pieces.length) {
            logger.trace("Not far along enough to warrant end-game mode.");
            return;
        }

        logger.trace("Possible end-game, we're about to request a piece "
                + "that was already requested from another peer.");
    }

    Piece chosen = requestStrategy.choosePiece(rarest, interesting, pieces);
    this.requestedPieces.set(chosen.getIndex());

    logger.trace("Requesting {} from {}, we now have {} " + "outstanding request(s): {}",
            new Object[] { chosen, peer, this.requestedPieces.cardinality(), this.requestedPieces });

    peer.downloadPiece(chosen);
}

From source file:org.apache.tez.runtime.library.output.OrderedPartitionedKVOutput.java

protected List<Event> generateEventsOnClose() throws IOException {
    DataMovementEventPayloadProto.Builder payloadBuilder = DataMovementEventPayloadProto.newBuilder();

    boolean outputGenerated = true;
    if (sendEmptyPartitionDetails) {
        Path indexFile = sorter.getMapOutput().getOutputIndexFile();
        TezSpillRecord spillRecord = new TezSpillRecord(indexFile, conf);
        BitSet emptyPartitionDetails = new BitSet();
        int emptyPartitions = 0;
        for (int i = 0; i < spillRecord.size(); i++) {
            TezIndexRecord indexRecord = spillRecord.getIndex(i);
            if (!indexRecord.hasData()) {
                emptyPartitionDetails.set(i);
                emptyPartitions++;//from  w  ww.ja  v a 2 s.c o  m
            }
        }
        outputGenerated = (spillRecord.size() != emptyPartitions);
        if (emptyPartitions > 0) {
            ByteString emptyPartitionsBytesString = TezCommonUtils
                    .compressByteArrayToByteString(TezUtilsInternal.toByteArray(emptyPartitionDetails));
            payloadBuilder.setEmptyPartitions(emptyPartitionsBytesString);
            LOG.info("EmptyPartition bitsetSize=" + emptyPartitionDetails.cardinality() + ", numOutputs="
                    + getNumPhysicalOutputs() + ", emptyPartitions=" + emptyPartitions + ", compressedSize="
                    + emptyPartitionsBytesString.size());
        }
    }
    if (!sendEmptyPartitionDetails || outputGenerated) {
        String host = System.getenv(ApplicationConstants.Environment.NM_HOST.toString());
        ByteBuffer shuffleMetadata = getContext()
                .getServiceProviderMetaData(ShuffleUtils.SHUFFLE_HANDLER_SERVICE_ID);
        int shufflePort = ShuffleUtils.deserializeShuffleProviderMetaData(shuffleMetadata);
        payloadBuilder.setHost(host);
        payloadBuilder.setPort(shufflePort);
        payloadBuilder.setPathComponent(getContext().getUniqueIdentifier());
    }

    payloadBuilder.setRunDuration((int) ((endTime - startTime) / 1000));
    DataMovementEventPayloadProto payloadProto = payloadBuilder.build();
    ByteBuffer payload = payloadProto.toByteString().asReadOnlyByteBuffer();

    long outputSize = getContext().getCounters().findCounter(TaskCounter.OUTPUT_BYTES).getValue();
    VertexManagerEventPayloadProto.Builder vmBuilder = VertexManagerEventPayloadProto.newBuilder();
    vmBuilder.setOutputSize(outputSize);
    VertexManagerEvent vmEvent = VertexManagerEvent.create(getContext().getDestinationVertexName(),
            vmBuilder.build().toByteString().asReadOnlyByteBuffer());

    List<Event> events = Lists.newArrayListWithCapacity(getNumPhysicalOutputs() + 1);
    events.add(vmEvent);

    CompositeDataMovementEvent csdme = CompositeDataMovementEvent.create(0, getNumPhysicalOutputs(), payload);
    events.add(csdme);

    return events;
}

From source file:org.apache.hadoop.mapred.TestCombineTextInputFormat.java

@Test(timeout = 10000)
public void testFormat() throws Exception {
    JobConf job = new JobConf(defaultConf);

    Random random = new Random();
    long seed = random.nextLong();
    LOG.info("seed = " + seed);
    random.setSeed(seed);//from  w w w .ja v  a  2 s  .  com

    localFs.delete(workDir, true);
    FileInputFormat.setInputPaths(job, workDir);

    final int length = 10000;
    final int numFiles = 10;

    createFiles(length, numFiles, random);

    // create a combined split for the files
    CombineTextInputFormat format = new CombineTextInputFormat();
    LongWritable key = new LongWritable();
    Text value = new Text();
    for (int i = 0; i < 3; i++) {
        int numSplits = random.nextInt(length / 20) + 1;
        LOG.info("splitting: requesting = " + numSplits);
        InputSplit[] splits = format.getSplits(job, numSplits);
        LOG.info("splitting: got =        " + splits.length);

        // we should have a single split as the length is comfortably smaller than
        // the block size
        assertEquals("We got more than one splits!", 1, splits.length);
        InputSplit split = splits[0];
        assertEquals("It should be CombineFileSplit", CombineFileSplit.class, split.getClass());

        // check the split
        BitSet bits = new BitSet(length);
        LOG.debug("split= " + split);
        RecordReader<LongWritable, Text> reader = format.getRecordReader(split, job, voidReporter);
        try {
            int count = 0;
            while (reader.next(key, value)) {
                int v = Integer.parseInt(value.toString());
                LOG.debug("read " + v);
                if (bits.get(v)) {
                    LOG.warn("conflict with " + v + " at position " + reader.getPos());
                }
                assertFalse("Key in multiple partitions.", bits.get(v));
                bits.set(v);
                count++;
            }
            LOG.info("splits=" + split + " count=" + count);
        } finally {
            reader.close();
        }
        assertEquals("Some keys in no partition.", length, bits.cardinality());
    }
}

From source file:net.sf.extjwnl.princeton.file.PrincetonRandomAccessDictionaryFile.java

private String renderSynset(Synset synset) {
    int estLength = offsetLength + 1//offset
            + 2 + 1 //lexfilenum
            + 1//ss_type
            + offsetLength + 1//w_cnt
            + (10 + 3 + 1) * synset.getWords().size()//avg word 10 chars + lex_id max 3 chars
            + offsetLength + 1//p_cnt
            + (1 + 1 + offsetLength + 1 + 1 + 1 + 4 + 1) * synset.getPointers().size()
            + synset.getGloss().length() + 2 + 2;
    if (POS.VERB == synset.getPOS()) {
        estLength = estLength + 8 * synset.getWords().size();//8 for verb flag, about one per word
    }//  w ww  .j  a va2s  .c  o  m

    //synset_offset  lex_filenum  ss_type  w_cnt  word  lex_id  [word  lex_id...]  p_cnt  [ptr...]  [frames...]  |   gloss
    //w_cnt Two digit hexadecimal integer indicating the number of words in the synset.
    String posKey = synset.getPOS().getKey();
    if (POS.ADJECTIVE == synset.getPOS() && synset.isAdjectiveCluster()) {
        posKey = POS.ADJECTIVE_SATELLITE_KEY;
    }
    if (checkLexFileNumber && log.isWarnEnabled()
            && !LexFileIdFileNameMap.getMap().containsKey(synset.getLexFileNum())) {
        log.warn(JWNL.resolveMessage("PRINCETON_WARN_001", synset.getLexFileNum()));
    }
    if (checkWordCountLimit && log.isWarnEnabled() && (0xFF < synset.getWords().size())) {
        log.warn(JWNL.resolveMessage("PRINCETON_WARN_004",
                new Object[] { synset.getOffset(), synset.getWords().size() }));
    }
    StringBuilder result = new StringBuilder(estLength);
    formatOffset(synset.getOffset(), offsetLength, result);
    if (synset.getLexFileNum() < 10) {
        result.append(" 0").append(synset.getLexFileNum());
    } else {
        result.append(" ").append(synset.getLexFileNum());
    }
    result.append(" ").append(posKey);
    if (synset.getWords().size() < 0x10) {
        result.append(" 0").append(Integer.toHexString(synset.getWords().size())).append(" ");
    } else {
        result.append(" ").append(Integer.toHexString(synset.getWords().size())).append(" ");
    }
    for (Word w : synset.getWords()) {
        //ASCII form of a word as entered in the synset by the lexicographer, with spaces replaced by underscore characters (_ ). The text of the word is case sensitive.
        //lex_id One digit hexadecimal integer that, when appended onto lemma , uniquely identifies a sense within a lexicographer file.
        String lemma = w.getLemma().replace(' ', '_');
        if (w instanceof Adjective) {
            Adjective a = (Adjective) w;
            if (AdjectivePosition.NONE != a.getAdjectivePosition()) {
                lemma = lemma + "(" + a.getAdjectivePosition().getKey() + ")";
            }
        }
        if (checkLexIdLimit && log.isWarnEnabled() && (0xF < w.getLexId())) {
            log.warn(JWNL.resolveMessage("PRINCETON_WARN_005",
                    new Object[] { synset.getOffset(), w.getLemma(), w.getLexId() }));
        }
        result.append(lemma).append(" ");
        result.append(Long.toHexString(w.getLexId())).append(" ");
    }
    //Three digit decimal integer indicating the number of pointers from this synset to other synsets. If p_cnt is 000 the synset has no pointers.
    if (checkRelationLimit && log.isWarnEnabled() && (999 < synset.getPointers().size())) {
        log.warn(JWNL.resolveMessage("PRINCETON_WARN_002",
                new Object[] { synset.getOffset(), synset.getPointers().size() }));
    }
    if (synset.getPointers().size() < 100) {
        result.append("0");
        if (synset.getPointers().size() < 10) {
            result.append("0");
        }
    }
    result.append(synset.getPointers().size()).append(" ");
    for (Pointer p : synset.getPointers()) {
        //pointer_symbol  synset_offset  pos  source/target
        result.append(p.getType().getKey()).append(" ");
        //synset_offset is the byte offset of the target synset in the data file corresponding to pos
        formatOffset(p.getTargetOffset(), offsetLength, result);
        result.append(" ");
        //pos
        result.append(p.getTargetPOS().getKey()).append(" ");
        //source/target
        //The source/target field distinguishes lexical and semantic pointers.
        // It is a four byte field, containing two two-digit hexadecimal integers.
        // The first two digits indicates the word number in the current (source) synset,
        // the last two digits indicate the word number in the target synset.
        // A value of 0000 means that pointer_symbol represents a semantic relation between the current (source) synset and the target synset indicated by synset_offset .

        //A lexical relation between two words in different synsets is represented by non-zero values in the source and target word numbers.
        // The first and last two bytes of this field indicate the word numbers in the source and target synsets, respectively, between which the relation holds.
        // Word numbers are assigned to the word fields in a synset, from left to right, beginning with 1 .
        if (checkPointerIndexLimit && log.isWarnEnabled() && (0xFF < p.getSourceIndex())) {
            log.warn(JWNL.resolveMessage("PRINCETON_WARN_006", new Object[] { synset.getOffset(),
                    p.getSource().getSynset().getOffset(), p.getSourceIndex() }));
        }
        if (checkPointerIndexLimit && log.isWarnEnabled() && (0xFF < p.getTargetIndex())) {
            log.warn(JWNL.resolveMessage("PRINCETON_WARN_006", new Object[] { synset.getOffset(),
                    p.getTarget().getSynset().getOffset(), p.getTargetIndex() }));
        }
        if (p.getSourceIndex() < 0x10) {
            result.append("0");
        }
        result.append(Integer.toHexString(p.getSourceIndex()));
        if (p.getTargetIndex() < 0x10) {
            result.append("0");
        }
        result.append(Integer.toHexString(p.getTargetIndex())).append(" ");
    }

    //frames In data.verb only
    if (POS.VERB == synset.getPOS()) {
        BitSet verbFrames = synset.getVerbFrameFlags();
        int verbFramesCount = verbFrames.cardinality();
        for (Word word : synset.getWords()) {
            if (word instanceof Verb) {
                BitSet bits = ((Verb) word).getVerbFrameFlags();
                for (int i = bits.nextSetBit(0); i >= 0; i = bits.nextSetBit(i + 1)) {
                    //WN TRICK - there are duplicates in data
                    //02593551 41 v 04 lord_it_over 0 queen_it_over 0 put_on_airs 0 act_superior 0 001 @ 02367363 v 0000
                    // 09 + 02 00 + 02 04 + 22 04 + 02 03 + 22 03 + 08 02 + 09 02 + 08 01 + 09 01 | act like the master of; "He is lording it over the students"
                    // + 02 04 and + 02 03 duplicate + 02 00
                    // it is the only one, but it causes offsets to differ on WN30 rewrite
                    if (!verbFrames.get(i)) {
                        verbFramesCount++;
                    }
                }
            }
        }
        if (checkVerbFrameLimit && log.isWarnEnabled() && (99 < verbFramesCount)) {
            log.warn(JWNL.resolveMessage("PRINCETON_WARN_007",
                    new Object[] { synset.getOffset(), verbFramesCount }));
        }
        if (verbFramesCount < 10) {
            result.append("0");
        }
        result.append(Integer.toString(verbFramesCount)).append(" ");
        for (int i = verbFrames.nextSetBit(0); i >= 0; i = verbFrames.nextSetBit(i + 1)) {
            if (checkVerbFrameLimit && log.isWarnEnabled() && (99 < i)) {
                log.warn(JWNL.resolveMessage("PRINCETON_WARN_008", new Object[] { synset.getOffset(), i }));
            }
            result.append("+ ");
            if (i < 10) {
                result.append("0");
            }
            result.append(Integer.toString(i));
            result.append(" 00 ");
        }
        for (Word word : synset.getWords()) {
            if (word instanceof Verb) {
                BitSet bits = ((Verb) word).getVerbFrameFlags();
                for (int i = bits.nextSetBit(0); i >= 0; i = bits.nextSetBit(i + 1)) {
                    if (!verbFrames.get(i)) {
                        if (checkVerbFrameLimit && log.isWarnEnabled() && (0xFF < word.getIndex())) {
                            log.warn(JWNL.resolveMessage("PRINCETON_WARN_008",
                                    new Object[] { synset.getOffset(), word.getIndex() }));
                        }
                        result.append("+ ");
                        if (i < 10) {
                            result.append("0");
                        }
                        result.append(Integer.toString(i)).append(" ");
                        if (word.getIndex() < 0x10) {
                            result.append("0");
                        }
                        result.append(Integer.toHexString(word.getIndex())).append(" ");
                    }
                }
            }
        }
    }

    result.append("| ").append(synset.getGloss()).append("  ");//why every line in most WN files ends with two spaces?

    return result.toString();
}

From source file:org.apache.hadoop.mapred.TestSequenceFileInputFormat.java

public void testFormat() throws Exception {
    JobConf job = new JobConf(conf);
    FileSystem fs = FileSystem.getLocal(conf);
    Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
    Path file = new Path(dir, "test.seq");

    Reporter reporter = Reporter.NULL;/*from w w  w.  j  a v  a  2  s  .c o  m*/

    int seed = new Random().nextInt();
    //LOG.info("seed = "+seed);
    Random random = new Random(seed);

    fs.delete(dir, true);

    FileInputFormat.setInputPaths(job, dir);

    // for a variety of lengths
    for (int length = 0; length < MAX_LENGTH; length += random.nextInt(MAX_LENGTH / 10) + 1) {

        //LOG.info("creating; entries = " + length);

        // create a file with length entries
        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class,
                BytesWritable.class);
        try {
            for (int i = 0; i < length; i++) {
                IntWritable key = new IntWritable(i);
                byte[] data = new byte[random.nextInt(10)];
                random.nextBytes(data);
                BytesWritable value = new BytesWritable(data);
                writer.append(key, value);
            }
        } finally {
            writer.close();
        }

        // try splitting the file in a variety of sizes
        InputFormat<IntWritable, BytesWritable> format = new SequenceFileInputFormat<IntWritable, BytesWritable>();
        IntWritable key = new IntWritable();
        BytesWritable value = new BytesWritable();
        for (int i = 0; i < 3; i++) {
            int numSplits = random.nextInt(MAX_LENGTH / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
            //LOG.info("splitting: requesting = " + numSplits);
            InputSplit[] splits = format.getSplits(job, numSplits);
            //LOG.info("splitting: got =        " + splits.length);

            // check each split
            BitSet bits = new BitSet(length);
            for (int j = 0; j < splits.length; j++) {
                RecordReader<IntWritable, BytesWritable> reader = format.getRecordReader(splits[j], job,
                        reporter);
                try {
                    int count = 0;
                    while (reader.next(key, value)) {
                        // if (bits.get(key.get())) {
                        // LOG.info("splits["+j+"]="+splits[j]+" : " + key.get());
                        // LOG.info("@"+reader.getPos());
                        // }
                        assertFalse("Key in multiple partitions.", bits.get(key.get()));
                        bits.set(key.get());
                        count++;
                    }
                    //LOG.info("splits["+j+"]="+splits[j]+" count=" + count);
                } finally {
                    reader.close();
                }
            }
            assertEquals("Some keys in no partition.", length, bits.cardinality());
        }

    }
}

From source file:org.apache.hadoop.mapred.TestMultiFileInputFormat.java

public void testFormat() throws IOException {
    if (LOG.isInfoEnabled()) {
        LOG.info("Test started");
        LOG.info("Max split count           = " + MAX_SPLIT_COUNT);
        LOG.info("Split count increment     = " + SPLIT_COUNT_INCR);
        LOG.info("Max bytes per file        = " + MAX_BYTES);
        LOG.info("Max number of files       = " + MAX_NUM_FILES);
        LOG.info("Number of files increment = " + NUM_FILES_INCR);
    }/*  w  ww .j a  va  2  s . c  o  m*/

    MultiFileInputFormat<Text, Text> format = new DummyMultiFileInputFormat();
    FileSystem fs = FileSystem.getLocal(job);

    for (int numFiles = 1; numFiles < MAX_NUM_FILES; numFiles += (NUM_FILES_INCR / 2)
            + rand.nextInt(NUM_FILES_INCR / 2)) {

        Path dir = initFiles(fs, numFiles, -1);
        BitSet bits = new BitSet(numFiles);
        for (int i = 1; i < MAX_SPLIT_COUNT; i += rand.nextInt(SPLIT_COUNT_INCR) + 1) {
            LOG.info("Running for Num Files=" + numFiles + ", split count=" + i);

            MultiFileSplit[] splits = (MultiFileSplit[]) format.getSplits(job, i);
            bits.clear();

            for (MultiFileSplit split : splits) {
                long splitLength = 0;
                for (Path p : split.getPaths()) {
                    long length = fs.getContentSummary(p).getLength();
                    assertEquals(length, lengths.get(p.getName()).longValue());
                    splitLength += length;
                    String name = p.getName();
                    int index = Integer.parseInt(name.substring(name.lastIndexOf("file_") + 5));
                    assertFalse(bits.get(index));
                    bits.set(index);
                }
                assertEquals(splitLength, split.getLength());
            }
        }
        assertEquals(bits.cardinality(), numFiles);
        fs.delete(dir, true);
    }
    LOG.info("Test Finished");
}

From source file:org.apache.nutch.tools.PruneIndexTool.java

/**
 * For each query, find all matching documents and delete them from all input
 * indexes. Optionally, an additional check can be performed by using {@link PruneChecker}
 * implementations.// www .  j  av a 2  s.  c  o  m
 */
public void run() {
    BitSet bits = new BitSet(reader.maxDoc());
    AllHitsCollector ahc = new AllHitsCollector(bits);
    boolean doDelete = false;
    for (int i = 0; i < queries.length; i++) {
        if (LOG.isInfoEnabled()) {
            LOG.info(dr + "Processing query: " + queries[i].toString());
        }
        bits.clear();
        try {
            searcher.search(queries[i], ahc);
        } catch (IOException e) {
            if (LOG.isWarnEnabled()) {
                LOG.warn(dr + " - failed: " + e.getMessage());
            }
            continue;
        }
        if (bits.cardinality() == 0) {
            if (LOG.isInfoEnabled()) {
                LOG.info(dr + " - no matching documents.");
            }
            continue;
        }
        if (LOG.isInfoEnabled()) {
            LOG.info(dr + " - found " + bits.cardinality() + " document(s).");
        }
        // Now delete all matching documents
        int docNum = -1, start = 0, cnt = 0;
        // probably faster than looping sequentially through all index values?
        while ((docNum = bits.nextSetBit(start)) != -1) {
            // don't delete the same document multiple times
            if (reader.isDeleted(docNum))
                continue;
            try {
                if (checkers != null && checkers.length > 0) {
                    boolean check = true;
                    for (int k = 0; k < checkers.length; k++) {
                        // fail if any checker returns false
                        check &= checkers[k].isPrunable(queries[i], reader, docNum);
                    }
                    doDelete = check;
                } else
                    doDelete = true;
                if (doDelete) {
                    if (!dryrun)
                        reader.deleteDocument(docNum);
                    cnt++;
                }
            } catch (Exception e) {
                if (LOG.isWarnEnabled()) {
                    LOG.warn(dr + " - failed to delete doc #" + docNum);
                }
            }
            start = docNum + 1;
        }
        if (LOG.isInfoEnabled()) {
            LOG.info(dr + " - deleted " + cnt + " document(s).");
        }
    }
    // close checkers
    if (checkers != null) {
        for (int i = 0; i < checkers.length; i++) {
            checkers[i].close();
        }
    }
    try {
        reader.close();
    } catch (IOException e) {
        if (LOG.isWarnEnabled()) {
            LOG.warn(dr + "Exception when closing reader(s): " + e.getMessage());
        }
    }
}

From source file:model.DecomposableModel.java

/**
 * Compute the difference in the entropy from this model, to one that would
 * add vertex1 and vertex2 to it//from ww  w. ja  v  a  2 s. c o  m
 * 
 * @param a
 * @param b
 * @param computer
 * @return
 */
public int treeWidthIfAdding(Integer a, Integer b) {
    // System.out.println("computing actual entropy");
    BitSet Sab = graph.getSeparator(a, b);
    BitSet Sabuaub = (BitSet) Sab.clone();
    Sabuaub.set(a);
    Sabuaub.set(b);
    return Sabuaub.cardinality();

}