List of usage examples for java.util BitSet clear
public void clear()
From source file:Main.java
public static void main(String[] args) { BitSet bitset1 = new BitSet(8); BitSet bitset2 = new BitSet(8); // assign values to bitset1 bitset1.set(0);/* w w w . j a v a 2 s .c om*/ bitset1.set(1); bitset1.set(2); // assign values to bitset2 bitset2.set(2); bitset2.set(4); bitset2.set(6); // print the sets System.out.println("Bitset1:" + bitset1); System.out.println("Bitset2:" + bitset2); // clear bitset1 bitset1.clear(); // clear bitset2 bitset2.clear(); // print new bitsets System.out.println(bitset1); System.out.println(bitset2); }
From source file:org.apache.pig.tools.pigstats.mapreduce.MRScriptState.java
public String getPigFeature(MapReduceOper mro) { if (featureMap == null) { featureMap = new HashMap<MapReduceOper, String>(); }//w w w . jav a 2 s. co m String retStr = featureMap.get(mro); if (retStr == null) { BitSet feature = new BitSet(); feature.clear(); if (mro.isSkewedJoin()) { feature.set(PIG_FEATURE.SKEWED_JOIN.ordinal()); } if (mro.isGlobalSort()) { feature.set(PIG_FEATURE.ORDER_BY.ordinal()); } if (mro.isSampler()) { feature.set(PIG_FEATURE.SAMPLER.ordinal()); } if (mro.isIndexer()) { feature.set(PIG_FEATURE.INDEXER.ordinal()); } if (mro.isCogroup()) { feature.set(PIG_FEATURE.COGROUP.ordinal()); } if (mro.isGroupBy()) { feature.set(PIG_FEATURE.GROUP_BY.ordinal()); } if (mro.isRegularJoin()) { feature.set(PIG_FEATURE.HASH_JOIN.ordinal()); } if (mro.needsDistinctCombiner()) { feature.set(PIG_FEATURE.DISTINCT.ordinal()); } if (!mro.combinePlan.isEmpty()) { feature.set(PIG_FEATURE.COMBINER.ordinal()); } if (mro instanceof NativeMapReduceOper) { feature.set(PIG_FEATURE.NATIVE.ordinal()); } else {// if it is NATIVE MR , don't explore its plans try { new FeatureVisitor(mro.mapPlan, feature).visit(); if (mro.reducePlan.isEmpty()) { feature.set(PIG_FEATURE.MAP_ONLY.ordinal()); } else { new FeatureVisitor(mro.reducePlan, feature).visit(); } } catch (VisitorException e) { LOG.warn("Feature visitor failed", e); } } StringBuilder sb = new StringBuilder(); for (int i = feature.nextSetBit(0); i >= 0; i = feature.nextSetBit(i + 1)) { if (sb.length() > 0) sb.append(","); sb.append(PIG_FEATURE.values()[i].name()); } retStr = sb.toString(); featureMap.put(mro, retStr); } return retStr; }
From source file:org.apache.hadoop.mapred.TestMultiFileInputFormat.java
public void testFormat() throws IOException { if (LOG.isInfoEnabled()) { LOG.info("Test started"); LOG.info("Max split count = " + MAX_SPLIT_COUNT); LOG.info("Split count increment = " + SPLIT_COUNT_INCR); LOG.info("Max bytes per file = " + MAX_BYTES); LOG.info("Max number of files = " + MAX_NUM_FILES); LOG.info("Number of files increment = " + NUM_FILES_INCR); }/*from w w w .ja v a2 s . c o m*/ MultiFileInputFormat<Text, Text> format = new DummyMultiFileInputFormat(); FileSystem fs = FileSystem.getLocal(job); for (int numFiles = 1; numFiles < MAX_NUM_FILES; numFiles += (NUM_FILES_INCR / 2) + rand.nextInt(NUM_FILES_INCR / 2)) { Path dir = initFiles(fs, numFiles, -1); BitSet bits = new BitSet(numFiles); for (int i = 1; i < MAX_SPLIT_COUNT; i += rand.nextInt(SPLIT_COUNT_INCR) + 1) { LOG.info("Running for Num Files=" + numFiles + ", split count=" + i); MultiFileSplit[] splits = (MultiFileSplit[]) format.getSplits(job, i); bits.clear(); for (MultiFileSplit split : splits) { long splitLength = 0; for (Path p : split.getPaths()) { long length = fs.getContentSummary(p).getLength(); assertEquals(length, lengths.get(p.getName()).longValue()); splitLength += length; String name = p.getName(); int index = Integer.parseInt(name.substring(name.lastIndexOf("file_") + 5)); assertFalse(bits.get(index)); bits.set(index); } assertEquals(splitLength, split.getLength()); } } assertEquals(bits.cardinality(), numFiles); fs.delete(dir, true); } LOG.info("Test Finished"); }
From source file:hivemall.ftvec.ranking.PopulateNotInUDTF.java
@Override public void process(Object[] args) throws HiveException { Object arg0 = args[0];/*from w ww . j a va 2 s .c o m*/ if (arg0 == null || listOI.getListLength(arg0) == 0) { populateAll(); return; } final BitSet bits; if (bitsetInput) { long[] longs = HiveUtils.asLongArray(arg0, listOI, listElemOI); bits = BitSet.valueOf(longs); } else { if (_bitset == null) { bits = new BitSet(); this._bitset = bits; } else { bits = _bitset; bits.clear(); } HiveUtils.setBits(arg0, listOI, listElemOI, bits); } populateItems(bits); }
From source file:hivemall.ftvec.ranking.ItemPairsSamplingUDTF.java
@Override public void process(Object[] args) throws HiveException { final int numPosItems; final BitSet bits; if (bitsetInput) { if (_rand == null) { this._rand = new Random(43); }//from w w w .j a va 2 s . c o m long[] longs = HiveUtils.asLongArray(args[0], listOI, listElemOI); bits = BitSet.valueOf(longs); numPosItems = bits.cardinality(); } else { if (_bitset == null) { bits = new BitSet(); this._bitset = bits; this._rand = new Random(43); } else { bits = _bitset; bits.clear(); } numPosItems = HiveUtils.setBits(args[0], listOI, listElemOI, bits); } if (numPosItems == 0) { return; } final int numNegItems = maxItemId + 1 - numPosItems; if (numNegItems == 0) { return; } else if (numNegItems < 0) { throw new UDFArgumentException( "maxItemId + 1 - numPosItems = " + maxItemId + " + 1 - " + numPosItems + " = " + numNegItems); } if (withReplacement) { sampleWithReplacement(numPosItems, numNegItems, bits); } else { sampleWithoutReplacement(numPosItems, numNegItems, bits); } }
From source file:au.org.ala.delta.translation.intkey.IntkeyItemsFileWriter.java
private void writeMultiStateAttributes(IdentificationKeyCharacter character) { int charNumber = character.getFilteredCharacterNumber(); int numStates = character.getNumberOfStates(); List<BitSet> attributes = new ArrayList<BitSet>(); Iterator<FilteredItem> items = _dataSet.filteredItems(); while (items.hasNext()) { int itemNum = items.next().getItem().getItemNumber(); MultiStateAttribute attribute = (MultiStateAttribute) _dataSet.getAttribute(itemNum, character.getCharacterNumber()); List<Integer> states = new ArrayList<Integer>(); if (attribute.isImplicit()) { ControllingInfo controllingInfo = _dataSet.checkApplicability(attribute.getCharacter(), attribute.getItem()); if (!controllingInfo.isInapplicable()) { states = character.getPresentStates(attribute); }//w w w. jav a2 s . c o m } else { states = character.getPresentStates(attribute); } // Turn into bitset. BitSet bits = new BinaryKeyFileEncoder().encodeAttributeStates(states); if (isInapplicable(attribute)) { if (attribute.isInherited()) { bits.clear(); } bits.set(numStates); } attributes.add(bits); } _itemsFile.writeAttributeBits(charNumber, attributes, numStates + 1); }
From source file:org.apache.nutch.tools.PruneIndexTool.java
/** * For each query, find all matching documents and delete them from all input * indexes. Optionally, an additional check can be performed by using {@link PruneChecker} * implementations.//from w w w . ja va 2 s. co m */ public void run() { BitSet bits = new BitSet(reader.maxDoc()); AllHitsCollector ahc = new AllHitsCollector(bits); boolean doDelete = false; for (int i = 0; i < queries.length; i++) { if (LOG.isInfoEnabled()) { LOG.info(dr + "Processing query: " + queries[i].toString()); } bits.clear(); try { searcher.search(queries[i], ahc); } catch (IOException e) { if (LOG.isWarnEnabled()) { LOG.warn(dr + " - failed: " + e.getMessage()); } continue; } if (bits.cardinality() == 0) { if (LOG.isInfoEnabled()) { LOG.info(dr + " - no matching documents."); } continue; } if (LOG.isInfoEnabled()) { LOG.info(dr + " - found " + bits.cardinality() + " document(s)."); } // Now delete all matching documents int docNum = -1, start = 0, cnt = 0; // probably faster than looping sequentially through all index values? while ((docNum = bits.nextSetBit(start)) != -1) { // don't delete the same document multiple times if (reader.isDeleted(docNum)) continue; try { if (checkers != null && checkers.length > 0) { boolean check = true; for (int k = 0; k < checkers.length; k++) { // fail if any checker returns false check &= checkers[k].isPrunable(queries[i], reader, docNum); } doDelete = check; } else doDelete = true; if (doDelete) { if (!dryrun) reader.deleteDocument(docNum); cnt++; } } catch (Exception e) { if (LOG.isWarnEnabled()) { LOG.warn(dr + " - failed to delete doc #" + docNum); } } start = docNum + 1; } if (LOG.isInfoEnabled()) { LOG.info(dr + " - deleted " + cnt + " document(s)."); } } // close checkers if (checkers != null) { for (int i = 0; i < checkers.length; i++) { checkers[i].close(); } } try { reader.close(); } catch (IOException e) { if (LOG.isWarnEnabled()) { LOG.warn(dr + "Exception when closing reader(s): " + e.getMessage()); } } }
From source file:edu.brown.benchmark.seats.SEATSClient.java
protected final void clearCache() { for (BitSet seats : CACHE_BOOKED_SEATS.values()) { seats.clear(); } // FOR/* w w w . ja v a2 s.c o m*/ for (Buffer<Reservation> queue : CACHE_RESERVATIONS.values()) { queue.clear(); } // FOR for (Set<Long> f_ids : CACHE_CUSTOMER_BOOKED_FLIGHTS.values()) { synchronized (f_ids) { f_ids.clear(); } // SYNCH } // FOR }
From source file:org.apache.lucene.index.collocations.CollocationExtractor.java
/** * Called for every term in the index//w w w.jav a 2 s .co m * docsAndPositions, possible speed up by http://lucene.apache.org/core/4_2_0/core/org/apache/lucene/index/TermsEnum.html * http://stackoverflow.com/questions/15771843/get-word-position-in-document-with-lucene * Migration Guide: http://lucene.apache.org/core/4_8_1/MIGRATE.html * http://stackoverflow.com/questions/15370652/retrieving-all-term-positions-from-docsandpositionsenum * @param bytesRef * @param logger * @param slop * @throws IOException */ void processTerm(BytesRef bytesRef, CollocationIndexer logger, int slop) throws IOException { Term term = new Term(this.fieldName, bytesRef); if (!filter.processTerm(term.text())) { return; } System.out.println("Processing term: " + term); // TermEnum te = reader.terms(term); // int numDocsForTerm = Math.min(te.docFreq(), maxNumDocsToAnalyze); int numDocsForTerm = Math.min(this.reader.docFreq(term), maxNumDocsToAnalyze); int totalNumDocs = reader.numDocs(); float percent = (float) numDocsForTerm / (float) totalNumDocs; isTermTooPopularOrNotPopularEnough(term, percent); // get a list of all the docs with this term // Apache Lucene Migration Guide // TermDocs td = reader.termDocs(term); // get dpe in first hand DocsAndPositionsEnum dpe = MultiFields.getTermPositionsEnum(this.reader, null, this.fieldName, bytesRef); HashMap<String, CollocationScorer> phraseTerms = new HashMap<String, CollocationScorer>(); int MAX_TERMS_PER_DOC = 100000; BitSet termPos = new BitSet(MAX_TERMS_PER_DOC); int numDocsAnalyzed = 0; // for all docs that contain this term int docSeq; while ((docSeq = dpe.nextDoc()) != DocsEnum.NO_MORE_DOCS) { int docId = dpe.docID(); // System.out.println("Processing docId: "+docId); numDocsAnalyzed++; if (numDocsAnalyzed > maxNumDocsToAnalyze) { break; } // get TermPositions for matching doc // TermPositionVector tpv = (TermPositionVector) reader.getTermFreqVector(docId, fieldName); // String[] terms_str = tpv.getTerms(); Terms tv = this.reader.getTermVector(docId, this.fieldName); TermsEnum te = tv.iterator(null); // TODO refactor iteration List<String> terms_list = new ArrayList<>(); while (te.next() != null) { terms_list.add(te.term().utf8ToString()); } String[] terms_str = terms_list.toArray(new String[terms_list.size()]); // System.out.println("terms_str: "+Arrays.toString(terms_str)); termPos.clear(); int index = recordAllPositionsOfTheTermInCurrentDocumentBitset(docSeq, term, termPos, tv, terms_str); // now look at all OTHER terms_str in this doc and see if they are // positioned in a pre-defined sized window around the current term /* for (int j = 0; j < terms_str.length; j++) { if (j == index) { // (item A) continue; } if (!filter.processTerm(terms_str[j])) { continue; } if (!StringUtils.isAlpha(terms_str[j])) { continue; } // sequential code boolean matchFound = false; for (int k = 0; ((k < dpe.freq()) && (!matchFound)); k++) { try { // inefficient // iterate through all other items (item B) Integer position = dpe.nextPosition(); Integer startpos = Math.max(0, position - slop); Integer endpos = position + slop; matchFound = populateHashMapWithPhraseTerms(term, numDocsForTerm, totalNumDocs, phraseTerms, termPos, terms_str, j, matchFound, startpos, endpos); } catch (ArrayIndexOutOfBoundsException e) { e.printStackTrace(); break; } catch (IOException e) { e.printStackTrace(); break; } } } */ /// boolean[] matchFound = new boolean[terms_str.length]; // single match is sufficient, no duplicate process for (int j = 0; j < matchFound.length; j++) matchFound[j] = false; for (int k = 0; (k < dpe.freq()); k++) { Integer position = dpe.nextPosition(); Integer startpos = Math.max(0, position - slop); Integer endpos = position + slop; for (int j = 0; j < terms_str.length && !matchFound[j]; j++) { if (j == index) { // (item A) continue; } if (!filter.processTerm(terms_str[j])) { continue; } if (!StringUtils.isAlpha(terms_str[j])) { continue; } // inefficient // iterate through all other items (item B) populateHashMapWithPhraseTerms(term, numDocsForTerm, totalNumDocs, phraseTerms, termPos, terms_str, j, matchFound, startpos, endpos); } } } // end docs loop sortTopTermsAndAddToCollocationsIndexForThisTerm(logger, phraseTerms); }
From source file:hivemall.smile.classification.GradientTreeBoostingClassifierUDTF.java
private void train2(@Nonnull final double[][] x, @Nonnull final int[] y) throws HiveException { final int numVars = SmileExtUtils.computeNumInputVars(_numVars, x); if (logger.isInfoEnabled()) { logger.info("k: " + 2 + ", numTrees: " + _numTrees + ", shirinkage: " + _eta + ", subsample: " + _subsample + ", numVars: " + numVars + ", maxDepth: " + _maxDepth + ", minSamplesSplit: " + _minSamplesSplit + ", maxLeafs: " + _maxLeafNodes + ", seed: " + _seed); }/*from ww w . j av a 2s. c o m*/ final int numInstances = x.length; final int numSamples = (int) Math.round(numInstances * _subsample); final double[] h = new double[numInstances]; // current F(x_i) final double[] response = new double[numInstances]; // response variable for regression tree. final double mu = smile.math.Math.mean(y); final double intercept = 0.5d * Math.log((1.d + mu) / (1.d - mu)); for (int i = 0; i < numInstances; i++) { h[i] = intercept; } final int[][] order = SmileExtUtils.sort(_attributes, x); final RegressionTree.NodeOutput output = new L2NodeOutput(response); final BitSet sampled = new BitSet(numInstances); final int[] bag = new int[numSamples]; final int[] perm = new int[numSamples]; for (int i = 0; i < numSamples; i++) { perm[i] = i; } long s = (this._seed == -1L) ? SmileExtUtils.generateSeed() : new smile.math.Random(_seed).nextLong(); final smile.math.Random rnd1 = new smile.math.Random(s); final smile.math.Random rnd2 = new smile.math.Random(rnd1.nextLong()); for (int m = 0; m < _numTrees; m++) { reportProgress(_progressReporter); SmileExtUtils.shuffle(perm, rnd1); for (int i = 0; i < numSamples; i++) { int index = perm[i]; bag[i] = index; sampled.set(index); } for (int i = 0; i < numInstances; i++) { response[i] = 2.0d * y[i] / (1.d + Math.exp(2.d * y[i] * h[i])); } RegressionTree tree = new RegressionTree(_attributes, x, response, numVars, _maxDepth, _maxLeafNodes, _minSamplesSplit, _minSamplesLeaf, order, bag, output, rnd2); for (int i = 0; i < numInstances; i++) { h[i] += _eta * tree.predict(x[i]); } // out-of-bag error estimate int oobTests = 0, oobErrors = 0; for (int i = sampled.nextClearBit(0); i < numInstances; i = sampled.nextClearBit(i + 1)) { oobTests++; final int pred = (h[i] > 0.d) ? 1 : 0; if (pred != y[i]) { oobErrors++; } } float oobErrorRate = 0.f; if (oobTests > 0) { oobErrorRate = ((float) oobErrors) / oobTests; } forward(m + 1, intercept, _eta, oobErrorRate, tree); sampled.clear(); } }