Example usage for java.util BitSet clear

List of usage examples for java.util BitSet clear

Introduction

In this page you can find the example usage for java.util BitSet clear.

Prototype

public void clear() 

Source Link

Document

Sets all of the bits in this BitSet to false .

Usage

From source file:Main.java

public static void main(String[] args) {

    BitSet bitset1 = new BitSet(8);
    BitSet bitset2 = new BitSet(8);

    // assign values to bitset1
    bitset1.set(0);/* w  w  w . j a  v  a 2 s  .c om*/
    bitset1.set(1);
    bitset1.set(2);
    // assign values to bitset2
    bitset2.set(2);
    bitset2.set(4);
    bitset2.set(6);

    // print the sets
    System.out.println("Bitset1:" + bitset1);
    System.out.println("Bitset2:" + bitset2);

    // clear bitset1
    bitset1.clear();

    // clear bitset2
    bitset2.clear();

    // print new bitsets
    System.out.println(bitset1);
    System.out.println(bitset2);

}

From source file:org.apache.pig.tools.pigstats.mapreduce.MRScriptState.java

public String getPigFeature(MapReduceOper mro) {
    if (featureMap == null) {
        featureMap = new HashMap<MapReduceOper, String>();
    }//w  w  w  .  jav a 2 s. co  m

    String retStr = featureMap.get(mro);
    if (retStr == null) {
        BitSet feature = new BitSet();
        feature.clear();
        if (mro.isSkewedJoin()) {
            feature.set(PIG_FEATURE.SKEWED_JOIN.ordinal());
        }
        if (mro.isGlobalSort()) {
            feature.set(PIG_FEATURE.ORDER_BY.ordinal());
        }
        if (mro.isSampler()) {
            feature.set(PIG_FEATURE.SAMPLER.ordinal());
        }
        if (mro.isIndexer()) {
            feature.set(PIG_FEATURE.INDEXER.ordinal());
        }
        if (mro.isCogroup()) {
            feature.set(PIG_FEATURE.COGROUP.ordinal());
        }
        if (mro.isGroupBy()) {
            feature.set(PIG_FEATURE.GROUP_BY.ordinal());
        }
        if (mro.isRegularJoin()) {
            feature.set(PIG_FEATURE.HASH_JOIN.ordinal());
        }
        if (mro.needsDistinctCombiner()) {
            feature.set(PIG_FEATURE.DISTINCT.ordinal());
        }
        if (!mro.combinePlan.isEmpty()) {
            feature.set(PIG_FEATURE.COMBINER.ordinal());
        }
        if (mro instanceof NativeMapReduceOper) {
            feature.set(PIG_FEATURE.NATIVE.ordinal());
        } else {// if it is NATIVE MR , don't explore its plans
            try {
                new FeatureVisitor(mro.mapPlan, feature).visit();
                if (mro.reducePlan.isEmpty()) {
                    feature.set(PIG_FEATURE.MAP_ONLY.ordinal());
                } else {
                    new FeatureVisitor(mro.reducePlan, feature).visit();
                }
            } catch (VisitorException e) {
                LOG.warn("Feature visitor failed", e);
            }
        }
        StringBuilder sb = new StringBuilder();
        for (int i = feature.nextSetBit(0); i >= 0; i = feature.nextSetBit(i + 1)) {
            if (sb.length() > 0)
                sb.append(",");
            sb.append(PIG_FEATURE.values()[i].name());
        }
        retStr = sb.toString();
        featureMap.put(mro, retStr);
    }
    return retStr;
}

From source file:org.apache.hadoop.mapred.TestMultiFileInputFormat.java

public void testFormat() throws IOException {
    if (LOG.isInfoEnabled()) {
        LOG.info("Test started");
        LOG.info("Max split count           = " + MAX_SPLIT_COUNT);
        LOG.info("Split count increment     = " + SPLIT_COUNT_INCR);
        LOG.info("Max bytes per file        = " + MAX_BYTES);
        LOG.info("Max number of files       = " + MAX_NUM_FILES);
        LOG.info("Number of files increment = " + NUM_FILES_INCR);
    }/*from w w  w  .ja v  a2  s . c  o  m*/

    MultiFileInputFormat<Text, Text> format = new DummyMultiFileInputFormat();
    FileSystem fs = FileSystem.getLocal(job);

    for (int numFiles = 1; numFiles < MAX_NUM_FILES; numFiles += (NUM_FILES_INCR / 2)
            + rand.nextInt(NUM_FILES_INCR / 2)) {

        Path dir = initFiles(fs, numFiles, -1);
        BitSet bits = new BitSet(numFiles);
        for (int i = 1; i < MAX_SPLIT_COUNT; i += rand.nextInt(SPLIT_COUNT_INCR) + 1) {
            LOG.info("Running for Num Files=" + numFiles + ", split count=" + i);

            MultiFileSplit[] splits = (MultiFileSplit[]) format.getSplits(job, i);
            bits.clear();

            for (MultiFileSplit split : splits) {
                long splitLength = 0;
                for (Path p : split.getPaths()) {
                    long length = fs.getContentSummary(p).getLength();
                    assertEquals(length, lengths.get(p.getName()).longValue());
                    splitLength += length;
                    String name = p.getName();
                    int index = Integer.parseInt(name.substring(name.lastIndexOf("file_") + 5));
                    assertFalse(bits.get(index));
                    bits.set(index);
                }
                assertEquals(splitLength, split.getLength());
            }
        }
        assertEquals(bits.cardinality(), numFiles);
        fs.delete(dir, true);
    }
    LOG.info("Test Finished");
}

From source file:hivemall.ftvec.ranking.PopulateNotInUDTF.java

@Override
public void process(Object[] args) throws HiveException {
    Object arg0 = args[0];/*from   w  ww  .  j  a va 2  s .c o m*/
    if (arg0 == null || listOI.getListLength(arg0) == 0) {
        populateAll();
        return;
    }

    final BitSet bits;
    if (bitsetInput) {
        long[] longs = HiveUtils.asLongArray(arg0, listOI, listElemOI);
        bits = BitSet.valueOf(longs);
    } else {
        if (_bitset == null) {
            bits = new BitSet();
            this._bitset = bits;
        } else {
            bits = _bitset;
            bits.clear();
        }
        HiveUtils.setBits(arg0, listOI, listElemOI, bits);
    }

    populateItems(bits);
}

From source file:hivemall.ftvec.ranking.ItemPairsSamplingUDTF.java

@Override
public void process(Object[] args) throws HiveException {
    final int numPosItems;
    final BitSet bits;
    if (bitsetInput) {
        if (_rand == null) {
            this._rand = new Random(43);
        }//from   w w  w .j  a va 2  s  .  c o m
        long[] longs = HiveUtils.asLongArray(args[0], listOI, listElemOI);
        bits = BitSet.valueOf(longs);
        numPosItems = bits.cardinality();
    } else {
        if (_bitset == null) {
            bits = new BitSet();
            this._bitset = bits;
            this._rand = new Random(43);
        } else {
            bits = _bitset;
            bits.clear();
        }
        numPosItems = HiveUtils.setBits(args[0], listOI, listElemOI, bits);
    }

    if (numPosItems == 0) {
        return;
    }
    final int numNegItems = maxItemId + 1 - numPosItems;
    if (numNegItems == 0) {
        return;
    } else if (numNegItems < 0) {
        throw new UDFArgumentException(
                "maxItemId + 1 - numPosItems = " + maxItemId + " + 1 - " + numPosItems + " = " + numNegItems);
    }

    if (withReplacement) {
        sampleWithReplacement(numPosItems, numNegItems, bits);
    } else {
        sampleWithoutReplacement(numPosItems, numNegItems, bits);
    }
}

From source file:au.org.ala.delta.translation.intkey.IntkeyItemsFileWriter.java

private void writeMultiStateAttributes(IdentificationKeyCharacter character) {

    int charNumber = character.getFilteredCharacterNumber();
    int numStates = character.getNumberOfStates();
    List<BitSet> attributes = new ArrayList<BitSet>();
    Iterator<FilteredItem> items = _dataSet.filteredItems();

    while (items.hasNext()) {
        int itemNum = items.next().getItem().getItemNumber();
        MultiStateAttribute attribute = (MultiStateAttribute) _dataSet.getAttribute(itemNum,
                character.getCharacterNumber());

        List<Integer> states = new ArrayList<Integer>();
        if (attribute.isImplicit()) {
            ControllingInfo controllingInfo = _dataSet.checkApplicability(attribute.getCharacter(),
                    attribute.getItem());
            if (!controllingInfo.isInapplicable()) {
                states = character.getPresentStates(attribute);
            }//w  w  w. jav  a2 s  . c o m
        } else {
            states = character.getPresentStates(attribute);
        }

        // Turn into bitset.
        BitSet bits = new BinaryKeyFileEncoder().encodeAttributeStates(states);

        if (isInapplicable(attribute)) {
            if (attribute.isInherited()) {
                bits.clear();
            }
            bits.set(numStates);
        }
        attributes.add(bits);
    }

    _itemsFile.writeAttributeBits(charNumber, attributes, numStates + 1);
}

From source file:org.apache.nutch.tools.PruneIndexTool.java

/**
 * For each query, find all matching documents and delete them from all input
 * indexes. Optionally, an additional check can be performed by using {@link PruneChecker}
 * implementations.//from  w w w  . ja va  2 s. co  m
 */
public void run() {
    BitSet bits = new BitSet(reader.maxDoc());
    AllHitsCollector ahc = new AllHitsCollector(bits);
    boolean doDelete = false;
    for (int i = 0; i < queries.length; i++) {
        if (LOG.isInfoEnabled()) {
            LOG.info(dr + "Processing query: " + queries[i].toString());
        }
        bits.clear();
        try {
            searcher.search(queries[i], ahc);
        } catch (IOException e) {
            if (LOG.isWarnEnabled()) {
                LOG.warn(dr + " - failed: " + e.getMessage());
            }
            continue;
        }
        if (bits.cardinality() == 0) {
            if (LOG.isInfoEnabled()) {
                LOG.info(dr + " - no matching documents.");
            }
            continue;
        }
        if (LOG.isInfoEnabled()) {
            LOG.info(dr + " - found " + bits.cardinality() + " document(s).");
        }
        // Now delete all matching documents
        int docNum = -1, start = 0, cnt = 0;
        // probably faster than looping sequentially through all index values?
        while ((docNum = bits.nextSetBit(start)) != -1) {
            // don't delete the same document multiple times
            if (reader.isDeleted(docNum))
                continue;
            try {
                if (checkers != null && checkers.length > 0) {
                    boolean check = true;
                    for (int k = 0; k < checkers.length; k++) {
                        // fail if any checker returns false
                        check &= checkers[k].isPrunable(queries[i], reader, docNum);
                    }
                    doDelete = check;
                } else
                    doDelete = true;
                if (doDelete) {
                    if (!dryrun)
                        reader.deleteDocument(docNum);
                    cnt++;
                }
            } catch (Exception e) {
                if (LOG.isWarnEnabled()) {
                    LOG.warn(dr + " - failed to delete doc #" + docNum);
                }
            }
            start = docNum + 1;
        }
        if (LOG.isInfoEnabled()) {
            LOG.info(dr + " - deleted " + cnt + " document(s).");
        }
    }
    // close checkers
    if (checkers != null) {
        for (int i = 0; i < checkers.length; i++) {
            checkers[i].close();
        }
    }
    try {
        reader.close();
    } catch (IOException e) {
        if (LOG.isWarnEnabled()) {
            LOG.warn(dr + "Exception when closing reader(s): " + e.getMessage());
        }
    }
}

From source file:edu.brown.benchmark.seats.SEATSClient.java

protected final void clearCache() {
    for (BitSet seats : CACHE_BOOKED_SEATS.values()) {
        seats.clear();
    } // FOR/*  w w w  . ja  v a2  s.c o  m*/
    for (Buffer<Reservation> queue : CACHE_RESERVATIONS.values()) {
        queue.clear();
    } // FOR
    for (Set<Long> f_ids : CACHE_CUSTOMER_BOOKED_FLIGHTS.values()) {
        synchronized (f_ids) {
            f_ids.clear();
        } // SYNCH
    } // FOR
}

From source file:org.apache.lucene.index.collocations.CollocationExtractor.java

/**
 * Called for every term in the index//w  w  w.jav  a 2  s  .co m
 * docsAndPositions, possible speed up by http://lucene.apache.org/core/4_2_0/core/org/apache/lucene/index/TermsEnum.html
 * http://stackoverflow.com/questions/15771843/get-word-position-in-document-with-lucene
 * Migration Guide: http://lucene.apache.org/core/4_8_1/MIGRATE.html
 * http://stackoverflow.com/questions/15370652/retrieving-all-term-positions-from-docsandpositionsenum
 * @param bytesRef
 * @param logger
 * @param slop
 * @throws IOException
 */
void processTerm(BytesRef bytesRef, CollocationIndexer logger, int slop) throws IOException {
    Term term = new Term(this.fieldName, bytesRef);
    if (!filter.processTerm(term.text())) {
        return;
    }
    System.out.println("Processing term: " + term);
    // TermEnum te = reader.terms(term);
    // int numDocsForTerm = Math.min(te.docFreq(), maxNumDocsToAnalyze);
    int numDocsForTerm = Math.min(this.reader.docFreq(term), maxNumDocsToAnalyze);
    int totalNumDocs = reader.numDocs();
    float percent = (float) numDocsForTerm / (float) totalNumDocs;

    isTermTooPopularOrNotPopularEnough(term, percent);

    // get a list of all the docs with this term
    // Apache Lucene Migration Guide
    // TermDocs td = reader.termDocs(term);
    // get dpe in first hand
    DocsAndPositionsEnum dpe = MultiFields.getTermPositionsEnum(this.reader, null, this.fieldName, bytesRef);
    HashMap<String, CollocationScorer> phraseTerms = new HashMap<String, CollocationScorer>();
    int MAX_TERMS_PER_DOC = 100000;
    BitSet termPos = new BitSet(MAX_TERMS_PER_DOC);

    int numDocsAnalyzed = 0;
    // for all docs that contain this term
    int docSeq;
    while ((docSeq = dpe.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
        int docId = dpe.docID();
        // System.out.println("Processing docId: "+docId);
        numDocsAnalyzed++;
        if (numDocsAnalyzed > maxNumDocsToAnalyze) {
            break;
        }
        // get TermPositions for matching doc
        // TermPositionVector tpv = (TermPositionVector) reader.getTermFreqVector(docId, fieldName);
        // String[] terms_str = tpv.getTerms();
        Terms tv = this.reader.getTermVector(docId, this.fieldName);
        TermsEnum te = tv.iterator(null);
        // TODO refactor iteration

        List<String> terms_list = new ArrayList<>();
        while (te.next() != null) {
            terms_list.add(te.term().utf8ToString());
        }
        String[] terms_str = terms_list.toArray(new String[terms_list.size()]);
        // System.out.println("terms_str: "+Arrays.toString(terms_str));
        termPos.clear();
        int index = recordAllPositionsOfTheTermInCurrentDocumentBitset(docSeq, term, termPos, tv, terms_str);

        // now look at all OTHER terms_str in this doc and see if they are
        // positioned in a pre-defined sized window around the current term
        /*
        for (int j = 0; j < terms_str.length; j++) {
        if (j == index) { // (item A)
           continue;
        }
        if (!filter.processTerm(terms_str[j])) {
           continue;
        }
        if (!StringUtils.isAlpha(terms_str[j])) {
            continue;
        }
        // sequential code
        boolean matchFound = false;
        for (int k = 0; ((k < dpe.freq()) && (!matchFound)); k++) {
            try {
                // inefficient
                // iterate through all other items (item B)
                Integer position = dpe.nextPosition();
                Integer startpos = Math.max(0, position - slop);
                Integer endpos = position + slop;
                matchFound = populateHashMapWithPhraseTerms(term,
                        numDocsForTerm, totalNumDocs, phraseTerms, termPos,
                        terms_str, j, matchFound, startpos, endpos);
            }
            catch (ArrayIndexOutOfBoundsException e) {
                e.printStackTrace();
                break;
            }
            catch (IOException e) {
                e.printStackTrace();
                break;
            }
                
        }
        }
        */

        ///
        boolean[] matchFound = new boolean[terms_str.length]; // single match is sufficient, no duplicate process
        for (int j = 0; j < matchFound.length; j++)
            matchFound[j] = false;

        for (int k = 0; (k < dpe.freq()); k++) {
            Integer position = dpe.nextPosition();
            Integer startpos = Math.max(0, position - slop);
            Integer endpos = position + slop;
            for (int j = 0; j < terms_str.length && !matchFound[j]; j++) {
                if (j == index) { // (item A)
                    continue;
                }
                if (!filter.processTerm(terms_str[j])) {
                    continue;
                }
                if (!StringUtils.isAlpha(terms_str[j])) {
                    continue;
                }
                // inefficient
                // iterate through all other items (item B)
                populateHashMapWithPhraseTerms(term, numDocsForTerm, totalNumDocs, phraseTerms, termPos,
                        terms_str, j, matchFound, startpos, endpos);
            }

        }
    } // end docs loop

    sortTopTermsAndAddToCollocationsIndexForThisTerm(logger, phraseTerms);
}

From source file:hivemall.smile.classification.GradientTreeBoostingClassifierUDTF.java

private void train2(@Nonnull final double[][] x, @Nonnull final int[] y) throws HiveException {
    final int numVars = SmileExtUtils.computeNumInputVars(_numVars, x);
    if (logger.isInfoEnabled()) {
        logger.info("k: " + 2 + ", numTrees: " + _numTrees + ", shirinkage: " + _eta + ", subsample: "
                + _subsample + ", numVars: " + numVars + ", maxDepth: " + _maxDepth + ", minSamplesSplit: "
                + _minSamplesSplit + ", maxLeafs: " + _maxLeafNodes + ", seed: " + _seed);
    }/*from  ww w  . j av  a 2s.  c o m*/

    final int numInstances = x.length;
    final int numSamples = (int) Math.round(numInstances * _subsample);

    final double[] h = new double[numInstances]; // current F(x_i)
    final double[] response = new double[numInstances]; // response variable for regression tree.

    final double mu = smile.math.Math.mean(y);
    final double intercept = 0.5d * Math.log((1.d + mu) / (1.d - mu));

    for (int i = 0; i < numInstances; i++) {
        h[i] = intercept;
    }

    final int[][] order = SmileExtUtils.sort(_attributes, x);
    final RegressionTree.NodeOutput output = new L2NodeOutput(response);

    final BitSet sampled = new BitSet(numInstances);
    final int[] bag = new int[numSamples];
    final int[] perm = new int[numSamples];
    for (int i = 0; i < numSamples; i++) {
        perm[i] = i;
    }

    long s = (this._seed == -1L) ? SmileExtUtils.generateSeed() : new smile.math.Random(_seed).nextLong();
    final smile.math.Random rnd1 = new smile.math.Random(s);
    final smile.math.Random rnd2 = new smile.math.Random(rnd1.nextLong());

    for (int m = 0; m < _numTrees; m++) {
        reportProgress(_progressReporter);

        SmileExtUtils.shuffle(perm, rnd1);
        for (int i = 0; i < numSamples; i++) {
            int index = perm[i];
            bag[i] = index;
            sampled.set(index);
        }

        for (int i = 0; i < numInstances; i++) {
            response[i] = 2.0d * y[i] / (1.d + Math.exp(2.d * y[i] * h[i]));
        }

        RegressionTree tree = new RegressionTree(_attributes, x, response, numVars, _maxDepth, _maxLeafNodes,
                _minSamplesSplit, _minSamplesLeaf, order, bag, output, rnd2);

        for (int i = 0; i < numInstances; i++) {
            h[i] += _eta * tree.predict(x[i]);
        }

        // out-of-bag error estimate
        int oobTests = 0, oobErrors = 0;
        for (int i = sampled.nextClearBit(0); i < numInstances; i = sampled.nextClearBit(i + 1)) {
            oobTests++;
            final int pred = (h[i] > 0.d) ? 1 : 0;
            if (pred != y[i]) {
                oobErrors++;
            }
        }
        float oobErrorRate = 0.f;
        if (oobTests > 0) {
            oobErrorRate = ((float) oobErrors) / oobTests;
        }

        forward(m + 1, intercept, _eta, oobErrorRate, tree);
        sampled.clear();
    }
}