List of usage examples for java.util BitSet set
public void set(int bitIndex)
From source file:au.org.ala.delta.translation.intkey.IntkeyItemsFileWriter.java
private Set<Float> writeRealAttributes(int filteredCharNumber, Character realChar, boolean wasInteger) { int unfilteredCharNumber = realChar.getCharacterId(); boolean useNormalValues = _context.getUseNormalValues(unfilteredCharNumber); List<FloatRange> values = new ArrayList<FloatRange>(); BitSet inapplicableBits = new BitSet(); Iterator<FilteredItem> items = _dataSet.filteredItems(); while (items.hasNext()) { FilteredItem item = items.next(); NumericAttribute attribute = (NumericAttribute) _dataSet.getAttribute(item.getItem().getItemNumber(), unfilteredCharNumber);//from w w w . ja v a2s . c o m if (attribute == null || attribute.isCodedUnknown() || attribute.isInapplicable() || attribute.isVariable()) { FloatRange range = new FloatRange(Float.MAX_VALUE); values.add(range); if (isInapplicable(attribute)) { inapplicableBits.set(item.getItemNumber() - 1); } continue; } List<NumericRange> ranges = attribute.getNumericValue(); // This can happen if the attribute has a comment but no value. if (ranges.isEmpty()) { FloatRange range = new FloatRange(-Float.MAX_VALUE); values.add(range); if (isInapplicable(attribute)) { inapplicableBits.set(item.getItemNumber() - 1); } continue; } Range useRange; float min = Float.MAX_VALUE; float max = -Float.MAX_VALUE; for (NumericRange range : ranges) { if (_context.hasAbsoluteError(unfilteredCharNumber)) { range.setAbsoluteError(_context.getAbsoluteError(unfilteredCharNumber)); } else if (_context.hasPercentageError(unfilteredCharNumber)) { range.setPercentageError(_context.getPercentageError(unfilteredCharNumber)); } if (useNormalValues) { useRange = range.getNormalRange(); } else { useRange = range.getFullRange(); } min = Math.min(min, useRange.getMinimumFloat()); max = Math.max(max, useRange.getMaximumFloat()); } FloatRange floatRange = new FloatRange(min, max); values.add(floatRange); } Set<Float> floats = new HashSet<Float>(); for (FloatRange range : values) { if (range.getMinimumFloat() != Float.MAX_VALUE && range.getMinimumFloat() != -Float.MAX_VALUE) { floats.add(range.getMinimumFloat()); } else { if (range.getMinimumFloat() == -Float.MAX_VALUE && !wasInteger) { floats.add(0f); // For CONFOR compatibility, seems wrong. } } if (range.getMaximumFloat() != Float.MAX_VALUE && range.getMinimumFloat() != -Float.MAX_VALUE) { floats.add(range.getMaximumFloat()); } else { if (range.getMinimumFloat() == -Float.MAX_VALUE && !wasInteger) { floats.add(1.0f); // For CONFOR compatibility, seems wrong. } } } List<Float> boundaries = new ArrayList<Float>(floats); Collections.sort(boundaries); _itemsFile.writeAttributeFloats(filteredCharNumber, inapplicableBits, values, boundaries); return floats; }
From source file:org.onosproject.tetopology.management.impl.DistributedTeTopologyStore.java
@Override public void updateNetwork(Network network) { InternalNetwork curNetwork = networkMap.get(network.networkId()); TeTopologyKey topoKey = null;// ww w . j a va 2s . c o m if (network.teTopologyId() != null) { topoKey = newTeTopologyKey(network.teTopologyId()); } // Update TE nodes List<TeNodeKey> teNodeKeys = null; if (MapUtils.isNotEmpty(network.nodes())) { teNodeKeys = Lists.newArrayList(); for (Map.Entry<KeyId, NetworkNode> entry : network.nodes().entrySet()) { NetworkNodeKey nodeKey = new NetworkNodeKey(network.networkId(), entry.getKey()); TeNodeKey teNodeKey = null; if (topoKey != null && entry.getValue().teNode() != null) { teNodeKey = new TeNodeKey(topoKey, entry.getValue().teNode().teNodeId()); } updateNetworkNode(nodeKey, entry.getValue(), true, false, teNodeKey); teNodeKeys.add(teNodeKey); } } // Update TE links List<TeLinkTpGlobalKey> teLinkKeys = null; if (MapUtils.isNotEmpty(network.links())) { teLinkKeys = Lists.newArrayList(); for (Map.Entry<KeyId, NetworkLink> entry : network.links().entrySet()) { NetworkLinkKey linkKey = new NetworkLinkKey(network.networkId(), entry.getKey()); TeLinkTpGlobalKey teLinkKey = null; if (topoKey != null && entry.getValue().teLink() != null) { teLinkKey = new TeLinkTpGlobalKey(topoKey, entry.getValue().teLink().teLinkKey()); } updateNetworkLink(linkKey, entry.getValue(), true, false, teLinkKey); teLinkKeys.add(teLinkKey); } } // New network, update TE Topology first if (curNetwork == null) { InternalTeTopology intTopo = new InternalTeTopology(network.teTopologyId().topologyId()); intTopo.setTeNodeKeys(teNodeKeys); intTopo.setTeLinkKeys(teLinkKeys); BitSet flags = new BitSet(TeConstants.FLAG_MAX_BITS); flags.set(TeTopology.BIT_LEARNT); if (network.teTopologyId().clientId() == TeTopologyManager.DEFAULT_PROVIDER_ID) { // Hard rule for now flags.set(TeTopology.BIT_CUSTOMIZED); } CommonTopologyData common = new CommonTopologyData(network.networkId(), OptimizationType.NOT_OPTIMIZED, flags, network.ownerId()); intTopo.setTopologydata(common); teTopologyMap.put(topoKey, intTopo); } // Finally Update networkMap InternalNetwork newNetwork = new InternalNetwork(network); newNetwork.setTeTopologyKey(topoKey); networkMap.put(network.networkId(), newNetwork); }
From source file:org.apache.tez.dag.utils.JavaProfilerOptions.java
/** * Get the set of tasks to be profiled within a vertex * * @param tasksToProfileInVertex//w w w . j a va2 s . co m * @return Set<Integer> containing the task indexes to be profiled */ private BitSet parseTasksToProfile(String tasksToProfileInVertex) { BitSet profiledTaskSet = new BitSet(); if (Strings.isNullOrEmpty(tasksToProfileInVertex)) { return profiledTaskSet; } Iterable<String> tasksInVertex = Splitter.on(",").omitEmptyStrings().trimResults() .split(tasksToProfileInVertex); for (String task : tasksInVertex) { /** * TODO: this is horrible way to check the ranges. * Should use RangeSet when guava is upgraded. Also, need to support partial * ranges like "1:", ":50". With current implementation partial ranges are not * allowed. */ if (task.endsWith(":") || task.startsWith(":")) { //invalid range. e.g :20, 6: are not supported. LOG.warn("Partial range is considered as an invalid option"); return null; } Matcher taskMatcher = RANGE_REGEX.matcher(task); if (taskMatcher.find()) { int start = Integer.parseInt((taskMatcher.group(1).trim())); int end = Integer.parseInt((taskMatcher.group(2).trim())); for (int i = Math.min(start, end); i <= Math.max(start, end); i++) { profiledTaskSet.set(i); } } else { profiledTaskSet.set(Integer.parseInt(task.trim())); } } return profiledTaskSet; }
From source file:org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory.java
private static ExprNodeDesc shortcutFunction(GenericUDF udf, List<ExprNodeDesc> newExprs, Operator<? extends Serializable> op) throws UDFArgumentException { if (udf instanceof GenericUDFOPEqual) { assert newExprs.size() == 2; boolean foundUDFInFirst = false; ExprNodeGenericFuncDesc caseOrWhenexpr = null; if (newExprs.get(0) instanceof ExprNodeGenericFuncDesc) { caseOrWhenexpr = (ExprNodeGenericFuncDesc) newExprs.get(0); if (caseOrWhenexpr.getGenericUDF() instanceof GenericUDFWhen || caseOrWhenexpr.getGenericUDF() instanceof GenericUDFCase) { foundUDFInFirst = true;/*from w w w.ja v a2 s . co m*/ } } if (!foundUDFInFirst && newExprs.get(1) instanceof ExprNodeGenericFuncDesc) { caseOrWhenexpr = (ExprNodeGenericFuncDesc) newExprs.get(1); if (!(caseOrWhenexpr.getGenericUDF() instanceof GenericUDFWhen || caseOrWhenexpr.getGenericUDF() instanceof GenericUDFCase)) { return null; } } if (null == caseOrWhenexpr) { // we didn't find case or when udf return null; } GenericUDF childUDF = caseOrWhenexpr.getGenericUDF(); List<ExprNodeDesc> children = caseOrWhenexpr.getChildren(); int i; if (childUDF instanceof GenericUDFWhen) { for (i = 1; i < children.size(); i += 2) { children.set(i, ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), Lists.newArrayList(children.get(i), newExprs.get(foundUDFInFirst ? 1 : 0)))); } if (children.size() % 2 == 1) { i = children.size() - 1; children.set(i, ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), Lists.newArrayList(children.get(i), newExprs.get(foundUDFInFirst ? 1 : 0)))); } // after constant folding of child expression the return type of UDFWhen might have changed, // so recreate the expression ExprNodeGenericFuncDesc newCaseOrWhenExpr = ExprNodeGenericFuncDesc.newInstance(childUDF, caseOrWhenexpr.getFuncText(), children); return newCaseOrWhenExpr; } else if (childUDF instanceof GenericUDFCase) { for (i = 2; i < children.size(); i += 2) { children.set(i, ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), Lists.newArrayList(children.get(i), newExprs.get(foundUDFInFirst ? 1 : 0)))); } if (children.size() % 2 == 0) { i = children.size() - 1; children.set(i, ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), Lists.newArrayList(children.get(i), newExprs.get(foundUDFInFirst ? 1 : 0)))); } // after constant folding of child expression the return type of UDFCase might have changed, // so recreate the expression ExprNodeGenericFuncDesc newCaseOrWhenExpr = ExprNodeGenericFuncDesc.newInstance(childUDF, caseOrWhenexpr.getFuncText(), children); return newCaseOrWhenExpr; } else { // cant happen return null; } } if (udf instanceof GenericUDFOPAnd) { final BitSet positionsToRemove = new BitSet(); final List<ExprNodeDesc> notNullExprs = new ArrayList<ExprNodeDesc>(); final List<Integer> notNullExprsPositions = new ArrayList<Integer>(); final List<ExprNodeDesc> compareExprs = new ArrayList<ExprNodeDesc>(); for (int i = 0; i < newExprs.size(); i++) { ExprNodeDesc childExpr = newExprs.get(i); if (childExpr instanceof ExprNodeConstantDesc) { ExprNodeConstantDesc c = (ExprNodeConstantDesc) childExpr; if (Boolean.TRUE.equals(c.getValue())) { // if true, prune it positionsToRemove.set(i); } else { // if false, return false return childExpr; } } else if (childExpr instanceof ExprNodeGenericFuncDesc && ((ExprNodeGenericFuncDesc) childExpr).getGenericUDF() instanceof GenericUDFOPNotNull && childExpr.getChildren().get(0) instanceof ExprNodeColumnDesc) { notNullExprs.add(childExpr.getChildren().get(0)); notNullExprsPositions.add(i); } else if (childExpr instanceof ExprNodeGenericFuncDesc && ((ExprNodeGenericFuncDesc) childExpr).getGenericUDF() instanceof GenericUDFBaseCompare && !(((ExprNodeGenericFuncDesc) childExpr).getGenericUDF() instanceof GenericUDFOPNotEqual) && childExpr.getChildren().size() == 2) { // Try to fold (key <op> 86) and (key is not null) to (key <op> 86) // where <op> can be "=", ">=", "<=", ">", "<". // Note: (key <> 86) and (key is not null) cannot be folded ExprNodeColumnDesc colDesc = ExprNodeDescUtils.getColumnExpr(childExpr.getChildren().get(0)); if (null == colDesc) { colDesc = ExprNodeDescUtils.getColumnExpr(childExpr.getChildren().get(1)); } if (colDesc != null) { compareExprs.add(colDesc); } } } // Try to fold (key = 86) and (key is not null) to (key = 86) for (int i = 0; i < notNullExprs.size(); i++) { for (ExprNodeDesc other : compareExprs) { if (notNullExprs.get(i).isSame(other)) { positionsToRemove.set(notNullExprsPositions.get(i)); break; } } } // Remove unnecessary expressions int pos = 0; int removed = 0; while ((pos = positionsToRemove.nextSetBit(pos)) != -1) { newExprs.remove(pos - removed); pos++; removed++; } if (newExprs.size() == 0) { return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.TRUE); } if (newExprs.size() == 1) { return newExprs.get(0); } } if (udf instanceof GenericUDFOPOr) { final BitSet positionsToRemove = new BitSet(); for (int i = 0; i < newExprs.size(); i++) { ExprNodeDesc childExpr = newExprs.get(i); if (childExpr instanceof ExprNodeConstantDesc) { ExprNodeConstantDesc c = (ExprNodeConstantDesc) childExpr; if (Boolean.FALSE.equals(c.getValue())) { // if false, prune it positionsToRemove.set(i); } else if (Boolean.TRUE.equals(c.getValue())) { // if true return true return childExpr; } } } int pos = 0; int removed = 0; while ((pos = positionsToRemove.nextSetBit(pos)) != -1) { newExprs.remove(pos - removed); pos++; removed++; } if (newExprs.size() == 0) { return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.FALSE); } if (newExprs.size() == 1) { return newExprs.get(0); } } if (udf instanceof GenericUDFWhen) { if (!(newExprs.size() == 2 || newExprs.size() == 3)) { // In general, when can have unlimited # of branches, // we currently only handle either 1 or 2 branch. return null; } ExprNodeDesc thenExpr = newExprs.get(1); ExprNodeDesc elseExpr = newExprs.size() == 3 ? newExprs.get(2) : new ExprNodeConstantDesc(newExprs.get(1).getTypeInfo(), null); ExprNodeDesc whenExpr = newExprs.get(0); if (whenExpr instanceof ExprNodeConstantDesc) { Boolean whenVal = (Boolean) ((ExprNodeConstantDesc) whenExpr).getValue(); return (whenVal == null || Boolean.FALSE.equals(whenVal)) ? elseExpr : thenExpr; } if (thenExpr instanceof ExprNodeConstantDesc && elseExpr instanceof ExprNodeConstantDesc) { ExprNodeConstantDesc constThen = (ExprNodeConstantDesc) thenExpr; ExprNodeConstantDesc constElse = (ExprNodeConstantDesc) elseExpr; Object thenVal = constThen.getValue(); Object elseVal = constElse.getValue(); if (thenVal == null) { if (elseVal == null) { // both branches are null. return thenExpr; } else if (op instanceof FilterOperator) { // we can still fold, since here null is equivalent to false. return Boolean.TRUE.equals(elseVal) ? ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), newExprs.subList(0, 1)) : Boolean.FALSE.equals(elseVal) ? elseExpr : null; } else { // can't do much, expression is not in context of filter, so we can't treat null as equivalent to false here. return null; } } else if (elseVal == null && op instanceof FilterOperator) { return Boolean.TRUE.equals(thenVal) ? whenExpr : Boolean.FALSE.equals(thenVal) ? thenExpr : null; } else if (thenVal.equals(elseVal)) { return thenExpr; } else if (thenVal instanceof Boolean && elseVal instanceof Boolean) { List<ExprNodeDesc> children = new ArrayList<>(); children.add(whenExpr); children.add(new ExprNodeConstantDesc(false)); ExprNodeGenericFuncDesc func = ExprNodeGenericFuncDesc.newInstance(new GenericUDFNvl(), children); if (Boolean.TRUE.equals(thenVal)) { return func; } else { List<ExprNodeDesc> exprs = new ArrayList<>(); exprs.add(func); return ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), exprs); } } else { return null; } } } if (udf instanceof GenericUDFCase) { // HIVE-9644 Attempt to fold expression like : // where (case ss_sold_date when '1998-01-01' then 1=1 else null=1 end); // where ss_sold_date= '1998-01-01' ; if (!(newExprs.size() == 3 || newExprs.size() == 4)) { // In general case can have unlimited # of branches, // we currently only handle either 1 or 2 branch. return null; } ExprNodeDesc thenExpr = newExprs.get(2); ExprNodeDesc elseExpr = newExprs.size() == 4 ? newExprs.get(3) : new ExprNodeConstantDesc(newExprs.get(2).getTypeInfo(), null); if (thenExpr instanceof ExprNodeConstantDesc && elseExpr instanceof ExprNodeConstantDesc) { ExprNodeConstantDesc constThen = (ExprNodeConstantDesc) thenExpr; ExprNodeConstantDesc constElse = (ExprNodeConstantDesc) elseExpr; Object thenVal = constThen.getValue(); Object elseVal = constElse.getValue(); if (thenVal == null) { if (null == elseVal) { return thenExpr; } else if (op instanceof FilterOperator) { return Boolean.TRUE.equals(elseVal) ? ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNotEqual(), newExprs.subList(0, 2)) : Boolean.FALSE.equals(elseVal) ? elseExpr : null; } else { return null; } } else if (null == elseVal && op instanceof FilterOperator) { return Boolean.TRUE.equals(thenVal) ? ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), newExprs.subList(0, 2)) : Boolean.FALSE.equals(thenVal) ? thenExpr : null; } else if (thenVal.equals(elseVal)) { return thenExpr; } else if (thenVal instanceof Boolean && elseVal instanceof Boolean) { ExprNodeGenericFuncDesc equal = ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), newExprs.subList(0, 2)); List<ExprNodeDesc> children = new ArrayList<>(); children.add(equal); children.add(new ExprNodeConstantDesc(false)); ExprNodeGenericFuncDesc func = ExprNodeGenericFuncDesc.newInstance(new GenericUDFNvl(), children); if (Boolean.TRUE.equals(thenVal)) { return func; } else { List<ExprNodeDesc> exprs = new ArrayList<>(); exprs.add(func); return ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), exprs); } } else { return null; } } } if (udf instanceof GenericUDFUnixTimeStamp) { if (newExprs.size() >= 1) { // unix_timestamp(args) -> to_unix_timestamp(args) return ExprNodeGenericFuncDesc.newInstance(new GenericUDFToUnixTimeStamp(), newExprs); } } return null; }
From source file:org.springframework.kafka.listener.KafkaMessageListenerContainerTests.java
private void testSlowListenerManualGuts(AckMode ackMode, String topic) throws Exception { logger.info("Start " + this.testName.getMethodName() + ackMode); Map<String, Object> props = KafkaTestUtils.consumerProps("slow2", "false", embeddedKafka); DefaultKafkaConsumerFactory<Integer, String> cf = new DefaultKafkaConsumerFactory<Integer, String>(props); ContainerProperties containerProps = new ContainerProperties(topic); containerProps.setSyncCommits(true); final CountDownLatch latch = new CountDownLatch(6); final BitSet bitSet = new BitSet(4); containerProps.setMessageListener((AcknowledgingMessageListener<Integer, String>) (message, ack) -> { logger.info("slow2: " + message); bitSet.set((int) (message.partition() * 3 + message.offset())); try {/*from w w w. j av a 2 s . c o m*/ Thread.sleep(1000); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } ack.acknowledge(); latch.countDown(); }); containerProps.setPauseAfter(100); containerProps.setAckMode(ackMode); KafkaMessageListenerContainer<Integer, String> container = new KafkaMessageListenerContainer<>(cf, containerProps); container.setBeanName("testSlow2"); container.start(); Consumer<?, ?> consumer = spyOnConsumer(container); final CountDownLatch commitLatch = new CountDownLatch(7); willAnswer(invocation -> { try { return invocation.callRealMethod(); } finally { commitLatch.countDown(); } }).given(consumer).commitSync(any()); ContainerTestUtils.waitForAssignment(container, embeddedKafka.getPartitionsPerTopic()); Map<String, Object> senderProps = KafkaTestUtils.producerProps(embeddedKafka); ProducerFactory<Integer, String> pf = new DefaultKafkaProducerFactory<Integer, String>(senderProps); KafkaTemplate<Integer, String> template = new KafkaTemplate<>(pf); template.setDefaultTopic(topic); template.sendDefault(0, "foo"); template.sendDefault(2, "bar"); template.sendDefault(0, "baz"); template.sendDefault(2, "qux"); template.flush(); Thread.sleep(300); template.sendDefault(0, "fiz"); template.sendDefault(2, "buz"); template.flush(); assertThat(latch.await(60, TimeUnit.SECONDS)).isTrue(); assertThat(commitLatch.await(60, TimeUnit.SECONDS)).isTrue(); assertThat(bitSet.cardinality()).isEqualTo(6); verify(consumer, atLeastOnce()).pause(anyObject()); verify(consumer, atLeastOnce()).resume(anyObject()); container.stop(); logger.info("Stop " + this.testName.getMethodName() + ackMode); }
From source file:com.google.uzaygezen.core.BitVectorTest.java
private void checkCopyFrom(Function<Integer, BitVector> factory) { int size = 10; BitVector bv = factory.apply(size);//from w w w.ja va2s .com for (long i = 1 << size; --i >= 0;) { bv.copyFrom(i); Assert.assertEquals(i, bv.toLong()); } final int bigSize = 1000; BitSet bs = new BitSet(); for (int i = 0; i < bigSize; i += 3) { bs.set(i); } bv = factory.apply(bigSize); bv.copyFrom(bs); for (int i = 0; i < bigSize; ++i) { Assert.assertEquals(i % 3 == 0, bv.get(i)); } }
From source file:org.apache.pig.backend.local.executionengine.LocalPigLauncher.java
private int runPipeline(POStore[] leaves, PigContext pc) throws IOException, ExecException { BitSet bs = new BitSet(leaves.length); int failed = 0; while (true) { if (bs.cardinality() == leaves.length) { break; }/* w w w . j a v a2 s .c o m*/ for (int i = bs.nextClearBit(0); i < leaves.length; i = bs.nextClearBit(i + 1)) { Result res = leaves[i].getNext(DUMMYTUPLE); switch (res.returnStatus) { case POStatus.STATUS_NULL: // good null from store means keep at it. continue; case POStatus.STATUS_OK: // ok shouldn't happen store should have consumed it. // fallthrough case POStatus.STATUS_ERR: leaves[i].cleanUp(); leaves[i].tearDown(); failed++; failedStores.add(leaves[i].getSFile()); if ("true".equalsIgnoreCase(pc.getProperties().getProperty("stop.on.failure", "false"))) { int errCode = 6017; String msg = "Execution failed, while processing " + leaves[i].getSFile().getFileName(); throw new ExecException(msg, errCode, PigException.REMOTE_ENVIRONMENT); } bs.set(i); break; case POStatus.STATUS_EOP: leaves[i].tearDown(); succeededStores.add(leaves[i].getSFile()); // fallthrough default: bs.set(i); break; } } } return failed; }
From source file:hivemall.smile.classification.GradientTreeBoostingClassifierUDTF.java
private void train2(@Nonnull final double[][] x, @Nonnull final int[] y) throws HiveException { final int numVars = SmileExtUtils.computeNumInputVars(_numVars, x); if (logger.isInfoEnabled()) { logger.info("k: " + 2 + ", numTrees: " + _numTrees + ", shirinkage: " + _eta + ", subsample: " + _subsample + ", numVars: " + numVars + ", maxDepth: " + _maxDepth + ", minSamplesSplit: " + _minSamplesSplit + ", maxLeafs: " + _maxLeafNodes + ", seed: " + _seed); }/*w ww .ja va 2 s. com*/ final int numInstances = x.length; final int numSamples = (int) Math.round(numInstances * _subsample); final double[] h = new double[numInstances]; // current F(x_i) final double[] response = new double[numInstances]; // response variable for regression tree. final double mu = smile.math.Math.mean(y); final double intercept = 0.5d * Math.log((1.d + mu) / (1.d - mu)); for (int i = 0; i < numInstances; i++) { h[i] = intercept; } final int[][] order = SmileExtUtils.sort(_attributes, x); final RegressionTree.NodeOutput output = new L2NodeOutput(response); final BitSet sampled = new BitSet(numInstances); final int[] bag = new int[numSamples]; final int[] perm = new int[numSamples]; for (int i = 0; i < numSamples; i++) { perm[i] = i; } long s = (this._seed == -1L) ? SmileExtUtils.generateSeed() : new smile.math.Random(_seed).nextLong(); final smile.math.Random rnd1 = new smile.math.Random(s); final smile.math.Random rnd2 = new smile.math.Random(rnd1.nextLong()); for (int m = 0; m < _numTrees; m++) { reportProgress(_progressReporter); SmileExtUtils.shuffle(perm, rnd1); for (int i = 0; i < numSamples; i++) { int index = perm[i]; bag[i] = index; sampled.set(index); } for (int i = 0; i < numInstances; i++) { response[i] = 2.0d * y[i] / (1.d + Math.exp(2.d * y[i] * h[i])); } RegressionTree tree = new RegressionTree(_attributes, x, response, numVars, _maxDepth, _maxLeafNodes, _minSamplesSplit, _minSamplesLeaf, order, bag, output, rnd2); for (int i = 0; i < numInstances; i++) { h[i] += _eta * tree.predict(x[i]); } // out-of-bag error estimate int oobTests = 0, oobErrors = 0; for (int i = sampled.nextClearBit(0); i < numInstances; i = sampled.nextClearBit(i + 1)) { oobTests++; final int pred = (h[i] > 0.d) ? 1 : 0; if (pred != y[i]) { oobErrors++; } } float oobErrorRate = 0.f; if (oobTests > 0) { oobErrorRate = ((float) oobErrors) / oobTests; } forward(m + 1, intercept, _eta, oobErrorRate, tree); sampled.clear(); } }
From source file:org.apache.poi.ss.format.CellNumberFormatter.java
/** {@inheritDoc} */ public void formatValue(StringBuffer toAppendTo, Object valueObject) { double value = ((Number) valueObject).doubleValue(); value *= scale;//from w w w. j a va 2 s . co m // the '-' sign goes at the front, always, so we pick it out boolean negative = value < 0; if (negative) value = -value; // Split out the fractional part if we need to print a fraction double fractional = 0; if (slash != null) { if (improperFraction) { fractional = value; value = 0; } else { fractional = value % 1.0; //noinspection SillyAssignment value = (long) value; } } Set<StringMod> mods = new TreeSet<>(); StringBuffer output = new StringBuffer(desc); if (exponent != null) { writeScientific(value, output, mods); } else if (improperFraction) { writeFraction(value, null, fractional, output, mods); } else { StringBuffer result = new StringBuffer(); Formatter f = new Formatter(result); f.format(LOCALE, printfFmt, value); if (numerator == null) { writeFractional(result, output); writeInteger(result, output, integerSpecials, mods, integerCommas); } else { writeFraction(value, result, fractional, output, mods); } } // Now strip out any remaining '#'s and add any pending text ... ListIterator<Special> it = specials.listIterator(); Iterator<StringMod> changes = mods.iterator(); StringMod nextChange = (changes.hasNext() ? changes.next() : null); int adjust = 0; BitSet deletedChars = new BitSet(); // records chars already deleted while (it.hasNext()) { Special s = it.next(); int adjustedPos = s.pos + adjust; if (!deletedChars.get(s.pos) && output.charAt(adjustedPos) == '#') { output.deleteCharAt(adjustedPos); adjust--; deletedChars.set(s.pos); } while (nextChange != null && s == nextChange.special) { int lenBefore = output.length(); int modPos = s.pos + adjust; int posTweak = 0; switch (nextChange.op) { case StringMod.AFTER: // ignore adding a comma after a deleted char (which was a '#') if (nextChange.toAdd.equals(",") && deletedChars.get(s.pos)) break; posTweak = 1; //noinspection fallthrough case StringMod.BEFORE: output.insert(modPos + posTweak, nextChange.toAdd); break; case StringMod.REPLACE: int delPos = s.pos; // delete starting pos in original coordinates if (!nextChange.startInclusive) { delPos++; modPos++; } // Skip over anything already deleted while (deletedChars.get(delPos)) { delPos++; modPos++; } int delEndPos = nextChange.end.pos; // delete end point in original if (nextChange.endInclusive) delEndPos++; int modEndPos = delEndPos + adjust; // delete end point in current if (modPos < modEndPos) { if (nextChange.toAdd == "") output.delete(modPos, modEndPos); else { char fillCh = nextChange.toAdd.charAt(0); for (int i = modPos; i < modEndPos; i++) output.setCharAt(i, fillCh); } deletedChars.set(delPos, delEndPos); } break; default: throw new IllegalStateException("Unknown op: " + nextChange.op); } adjust += output.length() - lenBefore; if (changes.hasNext()) nextChange = changes.next(); else nextChange = null; } } // Finally, add it to the string if (negative) toAppendTo.append('-'); toAppendTo.append(output); }
From source file:com.netease.news.utils.SplitInput.java
/** * Perform a split on the specified input file. Results will be written to files of the same name in the specified * training and test output directories. The {@link #validate()} method is called prior to executing the split. *//*from ww w .ja va 2s . c o m*/ public void splitFile(Path inputFile) throws IOException { Configuration conf = getConf(); FileSystem fs = inputFile.getFileSystem(conf); if (fs.getFileStatus(inputFile) == null) { throw new IOException(inputFile + " does not exist"); } if (fs.getFileStatus(inputFile).isDir()) { throw new IOException(inputFile + " is a directory"); } validate(); Path testOutputFile = new Path(testOutputDirectory, inputFile.getName()); Path trainingOutputFile = new Path(trainingOutputDirectory, inputFile.getName()); int lineCount = countLines(fs, inputFile, charset); log.info("{} has {} lines", inputFile.getName(), lineCount); int testSplitStart = 0; int testSplitSize = this.testSplitSize; // don't modify state BitSet randomSel = null; if (testRandomSelectionPct > 0 || testRandomSelectionSize > 0) { testSplitSize = this.testRandomSelectionSize; if (testRandomSelectionPct > 0) { testSplitSize = Math.round(lineCount * testRandomSelectionPct / 100.0f); } log.info("{} test split size is {} based on random selection percentage {}", inputFile.getName(), testSplitSize, testRandomSelectionPct); long[] ridx = new long[testSplitSize]; RandomSampler.sample(testSplitSize, lineCount - 1, testSplitSize, 0, ridx, 0, RandomUtils.getRandom()); randomSel = new BitSet(lineCount); for (long idx : ridx) { randomSel.set((int) idx + 1); } } else { if (testSplitPct > 0) { // calculate split size based on percentage testSplitSize = Math.round(lineCount * testSplitPct / 100.0f); log.info("{} test split size is {} based on percentage {}", inputFile.getName(), testSplitSize, testSplitPct); } else { log.info("{} test split size is {}", inputFile.getName(), testSplitSize); } if (splitLocation > 0) { // calculate start of split based on percentage testSplitStart = Math.round(lineCount * splitLocation / 100.0f); if (lineCount - testSplitStart < testSplitSize) { // adjust split start downwards based on split size. testSplitStart = lineCount - testSplitSize; } log.info("{} test split start is {} based on split location {}", inputFile.getName(), testSplitStart, splitLocation); } if (testSplitStart < 0) { throw new IllegalArgumentException( "test split size for " + inputFile + " is too large, it would produce an " + "empty training set from the initial set of " + lineCount + " examples"); } else if (lineCount - testSplitSize < testSplitSize) { log.warn( "Test set size for {} may be too large, {} is larger than the number of " + "lines remaining in the training set: {}", inputFile, testSplitSize, lineCount - testSplitSize); } } int trainCount = 0; int testCount = 0; if (!useSequence) { BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(inputFile), charset)); Writer trainingWriter = new OutputStreamWriter(fs.create(trainingOutputFile), charset); Writer testWriter = new OutputStreamWriter(fs.create(testOutputFile), charset); try { String line; int pos = 0; while ((line = reader.readLine()) != null) { pos++; Writer writer; if (testRandomSelectionPct > 0) { // Randomly choose writer = randomSel.get(pos) ? testWriter : trainingWriter; } else { // Choose based on location writer = pos > testSplitStart ? testWriter : trainingWriter; } if (writer == testWriter) { if (testCount >= testSplitSize) { writer = trainingWriter; } else { testCount++; } } if (writer == trainingWriter) { trainCount++; } writer.write(line); writer.write('\n'); } } finally { Closeables.close(reader, true); Closeables.close(trainingWriter, false); Closeables.close(testWriter, false); } } else { SequenceFileIterator<Writable, Writable> iterator = new SequenceFileIterator<Writable, Writable>( inputFile, false, fs.getConf()); SequenceFile.Writer trainingWriter = SequenceFile.createWriter(fs, fs.getConf(), trainingOutputFile, iterator.getKeyClass(), iterator.getValueClass()); SequenceFile.Writer testWriter = SequenceFile.createWriter(fs, fs.getConf(), testOutputFile, iterator.getKeyClass(), iterator.getValueClass()); try { int pos = 0; while (iterator.hasNext()) { pos++; SequenceFile.Writer writer; if (testRandomSelectionPct > 0) { // Randomly choose writer = randomSel.get(pos) ? testWriter : trainingWriter; } else { // Choose based on location writer = pos > testSplitStart ? testWriter : trainingWriter; } if (writer == testWriter) { if (testCount >= testSplitSize) { writer = trainingWriter; } else { testCount++; } } if (writer == trainingWriter) { trainCount++; } Pair<Writable, Writable> pair = iterator.next(); writer.append(pair.getFirst(), pair.getSecond()); } } finally { Closeables.close(iterator, true); Closeables.close(trainingWriter, false); Closeables.close(testWriter, false); } } log.info("file: {}, input: {} train: {}, test: {} starting at {}", inputFile.getName(), lineCount, trainCount, testCount, testSplitStart); // testing; if (callback != null) { callback.splitComplete(inputFile, lineCount, trainCount, testCount, testSplitStart); } }