Example usage for java.util Random nextDouble

Introduction

In this page you can find the example usage for java.util Random nextDouble.

Prototype

public double nextDouble()

Source Link

Document

Returns the next pseudorandom, uniformly distributed double value between 0.0 and 1.0 from this random number generator's sequence.

Usage

From source file:org.apache.hadoop.hive.ql.exec.Utilities.java

/**
 * Introducing a random factor to the wait time before another retry.
 * The wait time is dependent on # of failures and a random factor.
 * At the first time of getting an exception , the wait time
 * is a random number between 0..baseWindow msec. If the first retry
 * still fails, we will wait baseWindow msec grace period before the 2nd retry.
 * Also at the second retry, the waiting window is expanded to 2*baseWindow msec
 * alleviating the request rate from the server. Similarly the 3rd retry
 * will wait 2*baseWindow msec. grace period before retry and the waiting window is
 * expanded to 3*baseWindow msec and so on.
 * @param baseWindow the base waiting window.
 * @param failures number of failures so far.
 * @param r a random generator.//from   www .j  a va2 s  .c o m
 * @return number of milliseconds for the next wait time.
 */
public static long getRandomWaitTime(long baseWindow, int failures, Random r) {
    return (long) (baseWindow * failures + // grace period for the last round of attempt
            baseWindow * (failures + 1) * r.nextDouble()); // expanding time window for each failure
}

From source file:ml.shifu.shifu.core.dtrain.dt.DTWorker.java

@Override
public DTWorkerParams doCompute(WorkerContext<DTMasterParams, DTWorkerParams> context) {
    if (context.isFirstIteration()) {
        return new DTWorkerParams();
    }//from www . j  a  v  a2  s . c  o  m

    DTMasterParams lastMasterResult = context.getLastMasterResult();
    final List<TreeNode> trees = lastMasterResult.getTrees();
    final Map<Integer, TreeNode> todoNodes = lastMasterResult.getTodoNodes();
    if (todoNodes == null) {
        return new DTWorkerParams();
    }

    LOG.info("Start to work: todoNodes size is {}", todoNodes.size());

    Map<Integer, NodeStats> statistics = initTodoNodeStats(todoNodes);

    double trainError = 0d, validationError = 0d;
    double weightedTrainCount = 0d, weightedValidationCount = 0d;
    // renew random seed
    if (this.isGBDT && !this.gbdtSampleWithReplacement && lastMasterResult.isSwitchToNextTree()) {
        this.baggingRandomMap = new HashMap<Integer, Random>();
    }

    long start = System.nanoTime();
    for (Data data : this.trainingData) {
        if (this.isRF) {
            for (TreeNode treeNode : trees) {
                if (treeNode.getNode().getId() == Node.INVALID_INDEX) {
                    continue;
                }

                Node predictNode = predictNodeIndex(treeNode.getNode(), data, true);
                if (predictNode.getPredict() != null) {
                    // only update when not in first node, for treeNode, no predict statistics at that time
                    float weight = data.subsampleWeights[treeNode.getTreeId()];
                    if (Float.compare(weight, 0f) == 0) {
                        // oob data, no need to do weighting
                        validationError += data.significance * loss
                                .computeError((float) (predictNode.getPredict().getPredict()), data.label);
                        weightedValidationCount += data.significance;
                    } else {
                        trainError += weight * data.significance * loss
                                .computeError((float) (predictNode.getPredict().getPredict()), data.label);
                        weightedTrainCount += weight * data.significance;
                    }
                }
            }
        }

        if (this.isGBDT) {
            if (this.isContinuousEnabled && lastMasterResult.isContinuousRunningStart()) {
                recoverGBTData(context, data.output, data.predict, data, false);
                trainError += data.significance * loss.computeError(data.predict, data.label);
                weightedTrainCount += data.significance;
            } else {
                if (isNeedRecoverGBDTPredict) {
                    if (this.recoverTrees == null) {
                        this.recoverTrees = recoverCurrentTrees();
                    }
                    // recover gbdt data for fail over
                    recoverGBTData(context, data.output, data.predict, data, true);
                }
                int currTreeIndex = trees.size() - 1;

                if (lastMasterResult.isSwitchToNextTree()) {
                    if (currTreeIndex >= 1) {
                        Node node = trees.get(currTreeIndex - 1).getNode();
                        Node predictNode = predictNodeIndex(node, data, false);
                        if (predictNode.getPredict() != null) {
                            double predict = predictNode.getPredict().getPredict();
                            // first tree logic, master must set it to first tree even second tree with ROOT is
                            // sending
                            if (context.getLastMasterResult().isFirstTree()) {
                                data.predict = (float) predict;
                            } else {
                                // random drop
                                boolean drop = (this.dropOutRate > 0.0
                                        && dropOutRandom.nextDouble() < this.dropOutRate);
                                if (!drop) {
                                    data.predict += (float) (this.learningRate * predict);
                                }
                            }
                            data.output = -1f * loss.computeGradient(data.predict, data.label);
                        }
                        // if not sampling with replacement in gbdt, renew bagging sample rate in next tree
                        if (!this.gbdtSampleWithReplacement) {
                            Random random = null;
                            int classValue = (int) (data.label + 0.01f);
                            if (this.isStratifiedSampling) {
                                random = baggingRandomMap.get(classValue);
                                if (random == null) {
                                    random = DTrainUtils.generateRandomBySampleSeed(
                                            modelConfig.getTrain().getBaggingSampleSeed(),
                                            CommonConstants.NOT_CONFIGURED_BAGGING_SEED);
                                    baggingRandomMap.put(classValue, random);
                                }
                            } else {
                                random = baggingRandomMap.get(0);
                                if (random == null) {
                                    random = DTrainUtils.generateRandomBySampleSeed(
                                            modelConfig.getTrain().getBaggingSampleSeed(),
                                            CommonConstants.NOT_CONFIGURED_BAGGING_SEED);
                                    baggingRandomMap.put(0, random);
                                }
                            }
                            if (random.nextDouble() <= modelConfig.getTrain().getBaggingSampleRate()) {
                                data.subsampleWeights[currTreeIndex % data.subsampleWeights.length] = 1f;
                            } else {
                                data.subsampleWeights[currTreeIndex % data.subsampleWeights.length] = 0f;
                            }
                        }
                    }
                }

                if (context.getLastMasterResult().isFirstTree() && !lastMasterResult.isSwitchToNextTree()) {
                    Node currTree = trees.get(currTreeIndex).getNode();
                    Node predictNode = predictNodeIndex(currTree, data, true);
                    if (predictNode.getPredict() != null) {
                        trainError += data.significance * loss
                                .computeError((float) (predictNode.getPredict().getPredict()), data.label);
                        weightedTrainCount += data.significance;
                    }
                } else {
                    trainError += data.significance * loss.computeError(data.predict, data.label);
                    weightedTrainCount += data.significance;
                }
            }
        }
    }
    LOG.debug("Compute train error time is {}ms", TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start));

    if (validationData != null) {
        start = System.nanoTime();
        for (Data data : this.validationData) {
            if (this.isRF) {
                for (TreeNode treeNode : trees) {
                    if (treeNode.getNode().getId() == Node.INVALID_INDEX) {
                        continue;
                    }
                    Node predictNode = predictNodeIndex(treeNode.getNode(), data, true);
                    if (predictNode.getPredict() != null) {
                        // only update when not in first node, for treeNode, no predict statistics at that time
                        validationError += data.significance * loss
                                .computeError((float) (predictNode.getPredict().getPredict()), data.label);
                        weightedValidationCount += data.significance;
                    }
                }
            }

            if (this.isGBDT) {
                if (this.isContinuousEnabled && lastMasterResult.isContinuousRunningStart()) {
                    recoverGBTData(context, data.output, data.predict, data, false);
                    validationError += data.significance * loss.computeError(data.predict, data.label);
                    weightedValidationCount += data.significance;
                } else {
                    if (isNeedRecoverGBDTPredict) {
                        if (this.recoverTrees == null) {
                            this.recoverTrees = recoverCurrentTrees();
                        }
                        // recover gbdt data for fail over
                        recoverGBTData(context, data.output, data.predict, data, true);
                    }
                    int currTreeIndex = trees.size() - 1;
                    if (lastMasterResult.isSwitchToNextTree()) {
                        if (currTreeIndex >= 1) {
                            Node node = trees.get(currTreeIndex - 1).getNode();
                            Node predictNode = predictNodeIndex(node, data, false);
                            if (predictNode.getPredict() != null) {
                                double predict = predictNode.getPredict().getPredict();
                                if (context.getLastMasterResult().isFirstTree()) {
                                    data.predict = (float) predict;
                                } else {
                                    data.predict += (float) (this.learningRate * predict);
                                }
                                data.output = -1f * loss.computeGradient(data.predict, data.label);
                            }
                        }
                    }
                    if (context.getLastMasterResult().isFirstTree() && !lastMasterResult.isSwitchToNextTree()) {
                        Node predictNode = predictNodeIndex(trees.get(currTreeIndex).getNode(), data, true);
                        if (predictNode.getPredict() != null) {
                            validationError += data.significance * loss
                                    .computeError((float) (predictNode.getPredict().getPredict()), data.label);
                            weightedValidationCount += data.significance;
                        }
                    } else {
                        validationError += data.significance * loss.computeError(data.predict, data.label);
                        weightedValidationCount += data.significance;
                    }
                }
            }
        }
        LOG.debug("Compute val error time is {}ms", TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start));
    }

    if (this.isGBDT) {
        // reset trees to null to save memory
        this.recoverTrees = null;
        if (this.isNeedRecoverGBDTPredict) {
            // no need recover again
            this.isNeedRecoverGBDTPredict = false;
        }
    }

    start = System.nanoTime();
    CompletionService<Map<Integer, NodeStats>> completionService = new ExecutorCompletionService<Map<Integer, NodeStats>>(
            this.threadPool);

    int realThreadCount = 0;
    LOG.debug("while todo size {}", todoNodes.size());

    int realRecords = this.trainingData.size();
    int realThreads = this.workerThreadCount > realRecords ? realRecords : this.workerThreadCount;

    int[] trainLows = new int[realThreads];
    int[] trainHighs = new int[realThreads];

    int stepCount = realRecords / realThreads;
    if (realRecords % realThreads != 0) {
        // move step count to append last gap to avoid last thread worse 2*stepCount-1
        stepCount += (realRecords % realThreads) / stepCount;
    }
    for (int i = 0; i < realThreads; i++) {
        trainLows[i] = i * stepCount;
        if (i != realThreads - 1) {
            trainHighs[i] = trainLows[i] + stepCount - 1;
        } else {
            trainHighs[i] = realRecords - 1;
        }
    }

    for (int i = 0; i < realThreads; i++) {
        final Map<Integer, TreeNode> localTodoNodes = new HashMap<Integer, TreeNode>(todoNodes);
        final Map<Integer, NodeStats> localStatistics = initTodoNodeStats(todoNodes);

        final int startIndex = trainLows[i];
        final int endIndex = trainHighs[i];
        LOG.info("Thread {} todo size {} stats size {} start index {} end index {}", i, localTodoNodes.size(),
                localStatistics.size(), startIndex, endIndex);

        if (localTodoNodes.size() == 0) {
            continue;
        }
        realThreadCount += 1;
        completionService.submit(new Callable<Map<Integer, NodeStats>>() {
            @Override
            public Map<Integer, NodeStats> call() throws Exception {
                long start = System.nanoTime();
                List<Integer> nodeIndexes = new ArrayList<Integer>(trees.size());
                for (int j = startIndex; j <= endIndex; j++) {
                    Data data = DTWorker.this.trainingData.get(j);
                    nodeIndexes.clear();
                    if (DTWorker.this.isRF) {
                        for (TreeNode treeNode : trees) {
                            if (treeNode.getNode().getId() == Node.INVALID_INDEX) {
                                nodeIndexes.add(Node.INVALID_INDEX);
                            } else {
                                Node predictNode = predictNodeIndex(treeNode.getNode(), data, false);
                                nodeIndexes.add(predictNode.getId());
                            }
                        }
                    }

                    if (DTWorker.this.isGBDT) {
                        int currTreeIndex = trees.size() - 1;
                        Node predictNode = predictNodeIndex(trees.get(currTreeIndex).getNode(), data, false);
                        // update node index
                        nodeIndexes.add(predictNode.getId());
                    }
                    for (Map.Entry<Integer, TreeNode> entry : localTodoNodes.entrySet()) {
                        // only do statistics on effective data
                        Node todoNode = entry.getValue().getNode();
                        int treeId = entry.getValue().getTreeId();
                        int currPredictIndex = 0;
                        if (DTWorker.this.isRF) {
                            currPredictIndex = nodeIndexes.get(entry.getValue().getTreeId());
                        }
                        if (DTWorker.this.isGBDT) {
                            currPredictIndex = nodeIndexes.get(0);
                        }

                        if (todoNode.getId() == currPredictIndex) {
                            List<Integer> features = entry.getValue().getFeatures();
                            if (features.isEmpty()) {
                                features = getAllValidFeatures();
                            }
                            for (Integer columnNum : features) {
                                double[] featuerStatistic = localStatistics.get(entry.getKey())
                                        .getFeatureStatistics().get(columnNum);
                                float weight = data.subsampleWeights[treeId % data.subsampleWeights.length];
                                if (Float.compare(weight, 0f) != 0) {
                                    // only compute weight is not 0
                                    short binIndex = data.inputs[DTWorker.this.inputIndexMap.get(columnNum)];
                                    DTWorker.this.impurity.featureUpdate(featuerStatistic, binIndex,
                                            data.output, data.significance, weight);
                                }
                            }
                        }
                    }
                }
                LOG.debug("Thread computing stats time is {}ms in thread {}",
                        TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start),
                        Thread.currentThread().getName());
                return localStatistics;
            }
        });
    }

    int rCnt = 0;
    while (rCnt < realThreadCount) {
        try {
            Map<Integer, NodeStats> currNodeStatsmap = completionService.take().get();
            if (rCnt == 0) {
                statistics = currNodeStatsmap;
            } else {
                for (Entry<Integer, NodeStats> entry : statistics.entrySet()) {
                    NodeStats resultNodeStats = entry.getValue();
                    mergeNodeStats(resultNodeStats, currNodeStatsmap.get(entry.getKey()));
                }
            }
        } catch (ExecutionException e) {
            throw new RuntimeException(e);
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
        }
        rCnt += 1;
    }
    LOG.debug("Compute stats time is {}ms", TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start));

    LOG.info(
            "worker count is {}, error is {}, and stats size is {}. weightedTrainCount {}, weightedValidationCount {}, trainError {}, validationError {}",
            count, trainError, statistics.size(), weightedTrainCount, weightedValidationCount, trainError,
            validationError);
    return new DTWorkerParams(weightedTrainCount, weightedValidationCount, trainError, validationError,
            statistics);
}

From source file:ml.shifu.shifu.core.dtrain.wdl.WDLWorker.java

/**
 * Add to training set or validation set according to validation rate.
 * /*from w  w w.ja v  a 2  s .  c  o m*/
 * @param hashcode
 *            the hash code of the data
 * @param data
 *            data instance
 * @param attachment
 *            if it is validation
 * @return if in training, training is true, others are false.
 */
protected boolean addDataPairToDataSet(long hashcode, Data data, Object attachment) {
    // if validation data from configured validation data set
    boolean isValidation = (attachment != null && attachment instanceof Boolean) ? (Boolean) attachment : false;

    if (this.isKFoldCV) {
        int k = this.modelConfig.getTrain().getNumKFold();
        if (hashcode % k == this.trainerId) {
            this.validationData.append(data);
            if (isPositive(data.label)) {
                this.positiveValidationCount += 1L;
            } else {
                this.negativeValidationCount += 1L;
            }
            return false;
        } else {
            this.trainingData.append(data);
            if (isPositive(data.label)) {
                this.positiveTrainCount += 1L;
            } else {
                this.negativeTrainCount += 1L;
            }
            return true;
        }
    }

    if (this.isManualValidation) {
        if (isValidation) {
            this.validationData.append(data);
            if (isPositive(data.label)) {
                this.positiveValidationCount += 1L;
            } else {
                this.negativeValidationCount += 1L;
            }
            return false;
        } else {
            this.trainingData.append(data);
            if (isPositive(data.label)) {
                this.positiveTrainCount += 1L;
            } else {
                this.negativeTrainCount += 1L;
            }
            return true;
        }
    } else {
        if (Double.compare(this.modelConfig.getValidSetRate(), 0d) != 0) {
            int classValue = (int) (data.label + 0.01f);
            Random random = null;
            if (this.isStratifiedSampling) {
                // each class use one random instance
                random = validationRandomMap.get(classValue);
                if (random == null) {
                    random = new Random();
                    this.validationRandomMap.put(classValue, random);
                }
            } else {
                // all data use one random instance
                random = validationRandomMap.get(0);
                if (random == null) {
                    random = new Random();
                    this.validationRandomMap.put(0, random);
                }
            }

            if (this.modelConfig.isFixInitialInput()) {
                // for fix initial input, if hashcode%100 is in [start-hashcode, end-hashcode), validation,
                // otherwise training. start hashcode in different job is different to make sure bagging jobs have
                // different data. if end-hashcode is over 100, then check if hashcode is in [start-hashcode, 100]
                // or [0, end-hashcode]
                int startHashCode = (100 / this.modelConfig.getBaggingNum()) * this.trainerId;
                int endHashCode = startHashCode
                        + Double.valueOf(this.modelConfig.getValidSetRate() * 100).intValue();
                if (isInRange(hashcode, startHashCode, endHashCode)) {
                    this.validationData.append(data);
                    if (isPositive(data.label)) {
                        this.positiveValidationCount += 1L;
                    } else {
                        this.negativeValidationCount += 1L;
                    }
                    return false;
                } else {
                    this.trainingData.append(data);
                    if (isPositive(data.label)) {
                        this.positiveTrainCount += 1L;
                    } else {
                        this.negativeTrainCount += 1L;
                    }
                    return true;
                }
            } else {
                // not fixed initial input, if random value >= validRate, training, otherwise validation.
                if (random.nextDouble() >= this.modelConfig.getValidSetRate()) {
                    this.trainingData.append(data);
                    if (isPositive(data.label)) {
                        this.positiveTrainCount += 1L;
                    } else {
                        this.negativeTrainCount += 1L;
                    }
                    return true;
                } else {
                    this.validationData.append(data);
                    if (isPositive(data.label)) {
                        this.positiveValidationCount += 1L;
                    } else {
                        this.negativeValidationCount += 1L;
                    }
                    return false;
                }
            }
        } else {
            this.trainingData.append(data);
            if (isPositive(data.label)) {
                this.positiveTrainCount += 1L;
            } else {
                this.negativeTrainCount += 1L;
            }
            return true;
        }
    }
}

From source file:ml.shifu.shifu.core.dtrain.lr.LogisticRegressionWorker.java

/**
 * Add to training set or validation set according to validation rate.
 * //from w  ww  .  ja v a 2s . c o  m
 * @param hashcode
 *            the hash code of the data
 * @param data
 *            data instance
 * @param isValidation
 *            if it is validation
 * @return if in training, training is true, others are false.
 */
protected boolean addDataPairToDataSet(long hashcode, Data data, boolean isValidation) {
    if (this.isKFoldCV) {
        int k = this.modelConfig.getTrain().getNumKFold();
        if (hashcode % k == this.trainerId) {
            this.validationData.append(data);
            if (isPositive(data.outputs[0])) {
                this.positiveValidationCount += 1L;
            } else {
                this.negativeValidationCount += 1L;
            }
            return false;
        } else {
            this.trainingData.append(data);
            if (isPositive(data.outputs[0])) {
                this.positiveTrainCount += 1L;
            } else {
                this.negativeTrainCount += 1L;
            }
            return true;
        }
    }

    if (this.isSpecificValidation) {
        if (isValidation) {
            this.validationData.append(data);
            if (isPositive(data.outputs[0])) {
                this.positiveValidationCount += 1L;
            } else {
                this.negativeValidationCount += 1L;
            }
            return false;
        } else {
            this.trainingData.append(data);
            if (isPositive(data.outputs[0])) {
                this.positiveTrainCount += 1L;
            } else {
                this.negativeTrainCount += 1L;
            }
            return true;
        }
    } else {
        if (Double.compare(this.modelConfig.getValidSetRate(), 0d) != 0) {
            int classValue = (int) (data.outputs[0] + 0.01f);
            Random random = null;
            if (this.isStratifiedSampling) {
                // each class use one random instance
                random = validationRandomMap.get(classValue);
                if (random == null) {
                    random = new Random();
                    this.validationRandomMap.put(classValue, random);
                }
            } else {
                // all data use one random instance
                random = validationRandomMap.get(0);
                if (random == null) {
                    random = new Random();
                    this.validationRandomMap.put(0, random);
                }
            }

            if (this.modelConfig.isFixInitialInput()) {
                // for fix initial input, if hashcode%100 is in [start-hashcode, end-hashcode), validation,
                // otherwise training. start hashcode in different job is different to make sure bagging jobs have
                // different data. if end-hashcode is over 100, then check if hashcode is in [start-hashcode, 100]
                // or [0, end-hashcode]
                int startHashCode = (100 / this.modelConfig.getBaggingNum()) * this.trainerId;
                int endHashCode = startHashCode
                        + Double.valueOf(this.modelConfig.getValidSetRate() * 100).intValue();
                if (isInRange(hashcode, startHashCode, endHashCode)) {
                    this.validationData.append(data);
                    if (isPositive(data.outputs[0])) {
                        this.positiveValidationCount += 1L;
                    } else {
                        this.negativeValidationCount += 1L;
                    }
                    return false;
                } else {
                    this.trainingData.append(data);
                    if (isPositive(data.outputs[0])) {
                        this.positiveTrainCount += 1L;
                    } else {
                        this.negativeTrainCount += 1L;
                    }
                    return true;
                }
            } else {
                // not fixed initial input, if random value >= validRate, training, otherwise validation.
                if (random.nextDouble() >= this.modelConfig.getValidSetRate()) {
                    this.trainingData.append(data);
                    if (isPositive(data.outputs[0])) {
                        this.positiveTrainCount += 1L;
                    } else {
                        this.negativeTrainCount += 1L;
                    }
                    return true;
                } else {
                    this.validationData.append(data);
                    if (isPositive(data.outputs[0])) {
                        this.positiveValidationCount += 1L;
                    } else {
                        this.negativeValidationCount += 1L;
                    }
                    return false;
                }
            }
        } else {
            this.trainingData.append(data);
            if (isPositive(data.outputs[0])) {
                this.positiveTrainCount += 1L;
            } else {
                this.negativeTrainCount += 1L;
            }
            return true;
        }
    }
}

From source file:ubic.gemma.analysis.expression.coexpression.links.LinkAnalysisServiceImpl.java

/**
 * Write links as text. If "known genes only", only known genes will be displayed, even if the probe in question
 * targets other "types" of genes./* www.j  a  v  a 2 s  .c o  m*/
 * 
 * @param la
 * @param wr
 */
private void writeLinks(final LinkAnalysis la, FilterConfig filterConfig, Writer wr) throws IOException {
    Map<CompositeSequence, Collection<Collection<Gene>>> probeToGeneMap = la.getProbeToGeneMap();
    ObjectArrayList links = la.getKeep();
    double subsetSize = la.getConfig().getSubsetSize();
    List<String> buf = new ArrayList<String>();
    if (la.getConfig().isSubset() && links.size() > subsetSize) {
        la.getConfig().setSubsetUsed(true);
    }
    wr.write(la.getConfig().toString());
    wr.write(filterConfig.toString());
    NumberFormat nf = NumberFormat.getInstance();
    nf.setMaximumFractionDigits(4);

    Integer probeDegreeThreshold = la.getConfig().getProbeDegreeThreshold();

    Transformer officialSymbolExtractor = new Transformer() {
        @Override
        public Object transform(Object input) {
            Gene g = (Gene) input;

            return g.getOfficialSymbol();
        }
    };

    int i = 0;
    int keptLinksCount = 0;
    Random generator = new Random();
    double rand = 0.0;
    double fraction = subsetSize / links.size();
    int skippedDueToDegree = 0;
    for (int n = links.size(); i < n; i++) {

        Object val = links.getQuick(i);
        if (val == null)
            continue;
        Link m = (Link) val;
        Double w = m.getWeight();

        assert w != null;

        int x = m.getx();
        int y = m.gety();

        if (probeDegreeThreshold > 0 && (la.getProbeDegree(x) > probeDegreeThreshold
                || la.getProbeDegree(y) > probeDegreeThreshold)) {
            skippedDueToDegree++;
            continue;
        }

        CompositeSequence p1 = la.getProbe(x);
        CompositeSequence p2 = la.getProbe(y);

        Collection<Collection<Gene>> g1 = probeToGeneMap.get(p1);
        Collection<Collection<Gene>> g2 = probeToGeneMap.get(p2);

        List<String> genes1 = new ArrayList<String>();
        for (Collection<Gene> cluster : g1) {

            if (cluster.isEmpty())
                continue;

            String t = StringUtils.join(new TransformIterator(cluster.iterator(), officialSymbolExtractor),
                    ",");
            genes1.add(t);
        }

        List<String> genes2 = new ArrayList<String>();
        for (Collection<Gene> cluster : g2) {

            if (cluster.isEmpty())
                continue;

            String t = StringUtils.join(new TransformIterator(cluster.iterator(), officialSymbolExtractor),
                    ",");
            genes2.add(t);
        }

        if (genes2.size() == 0 || genes1.size() == 0) {
            continue;
        }

        String gene1String = StringUtils.join(genes1.iterator(), "|");
        String gene2String = StringUtils.join(genes2.iterator(), "|");

        if (gene1String.equals(gene2String)) {
            continue;
        }

        if (++keptLinksCount % 50000 == 0) {
            log.info(keptLinksCount + " links retained");
        }

        if (la.getConfig().isSubsetUsed()) {
            rand = generator.nextDouble();
            if (rand > fraction)
                continue;
        }

        buf.add(p1.getId() + "\t" + p2.getId() + "\t" + gene1String + "\t" + gene2String + "\t" + nf.format(w)
                + "\n");// save links
        // wr.write( p1.getId() + "\t" + p2.getId() + "\t" + gene1String + "\t" + gene2String + "\t" + nf.format( w
        // ) + "\n" );

    }

    wr.write("# totalLinks:" + keptLinksCount + "\n");
    wr.write("# printedLinks:" + buf.size() + "\n");
    wr.write("# skippedDueToHighNodeDegree:" + skippedDueToDegree + "\n");

    for (String line : buf) {// write links to file
        wr.write(line);
    }

    if (la.getConfig().isSubsetUsed()) {// subset option activated
        log.info("Done, " + keptLinksCount + "/" + links.size() + " links kept, " + buf.size()
                + " links printed");
        // wr.write("# Amount of links before subsetting/after subsetting: " + links.size() + "/" + numPrinted +
        // "\n" );
    } else {
        log.info("Done, " + keptLinksCount + "/" + links.size()
                + " links printed (some may have been filtered)");
    }
    wr.flush();

}

From source file:com.ebay.erl.mobius.core.mapred.MobiusInputSampler.java

@Override
public Object[] getSample(InputFormat inf, JobConf job) throws IOException {
    // the following codes are copied from {@link InputSampler#RandomSampler},
    // but require some modifications.

    InputSplit[] splits = inf.getSplits(job, job.getNumMapTasks());
    ArrayList<DataJoinKey> samples = new ArrayList<DataJoinKey>(this.numSamples);
    int splitsToSample = Math.min(this.maxSplitsSampled, splits.length);

    Random r = new Random();
    long seed = r.nextLong();
    r.setSeed(seed);/*w  w w .j  ava2 s.c om*/

    // get Sorters
    Sorter[] sorters = null;
    if (job.get(ConfigureConstants.SORTERS, null) != null) {
        // total sort job
        sorters = (Sorter[]) SerializableUtil.deserializeFromBase64(job.get(ConfigureConstants.SORTERS), job);
    } else {
        // there is no sorter, should be reducer/join job
        Column[] keys = (Column[]) SerializableUtil
                .deserializeFromBase64(job.get(ConfigureConstants.ALL_GROUP_KEY_COLUMNS), job);
        sorters = new Sorter[keys.length];
        for (int i = 0; i < keys.length; i++) {
            sorters[i] = new Sorter(keys[i].getInputColumnName(), Ordering.ASC);
        }
    }

    long proportion = 10L;
    while ((int) (this.freq * proportion) == 0) {
        proportion = proportion * 10;
    }
    proportion = 5L * proportion;

    // shuffle splits
    for (int i = 0; i < splits.length; ++i) {
        InputSplit tmp = splits[i];
        int j = r.nextInt(splits.length);
        splits[i] = splits[j];
        splits[j] = tmp;
    }

    SamplingOutputCollector collector = new SamplingOutputCollector();
    for (int i = 0; i < splitsToSample || (i < splits.length && samples.size() < numSamples); i++) {
        LOGGER.info("Sampling from split #" + (i + 1) + ", collected samples:" + samples.size());

        RecordReader<WritableComparable, WritableComparable> reader = inf.getRecordReader(splits[i], job,
                Reporter.NULL);
        WritableComparable key = reader.createKey();
        WritableComparable value = reader.createValue();

        if (!(inf instanceof MobiusDelegatingInputFormat)) {
            // not mobius delegating input format, so the CURRENT_DATASET_ID
            // will not be set by inf#getRecordReader, we set them here.
            //
            // set the current dataset id, as the AbstractMobiusMapper#configure
            // method needs this property.
            job.set(ConfigureConstants.CURRENT_DATASET_ID, job.get(ConfigureConstants.ALL_DATASET_IDS));
        }

        Byte datasetID = Byte.valueOf(job.get(ConfigureConstants.CURRENT_DATASET_ID));
        LOGGER.info("Samples coming from dataset: " + datasetID.toString());
        AbstractMobiusMapper mapper = this.getMapper(inf, splits[i], job);
        mapper.configure(job);

        // reading elements from one split
        long readElement = 0;
        while (reader.next(key, value)) {
            collector.clear();
            Tuple tuple = mapper.parse(key, value);

            readElement++;
            if (readElement > (((long) numSamples) * ((long) proportion))) {
                // a split might be very big (ex: a large gz file),
                // so we just need to read the 
                break;
            }

            if (r.nextDouble() <= freq) {
                if (samples.size() < numSamples) {
                    mapper.joinmap(key, value, collector, Reporter.NULL);
                    // joinmap function might generate more than one output key
                    // per <code>key</code> input. 
                    for (Tuple t : collector.getOutKey()) {
                        Tuple mt = Tuple.merge(tuple, t);
                        DataJoinKey nkey = this.getKey(mt, sorters, datasetID, mapper, job);
                        samples.add(nkey);
                    }
                } else {
                    // When exceeding the maximum number of samples, replace
                    // a random element with this one, then adjust the
                    // frequency to reflect the possibility of existing 
                    // elements being pushed out

                    mapper.joinmap(key, value, collector, Reporter.NULL);
                    for (Tuple t : collector.getOutKey()) {
                        int ind = r.nextInt(numSamples);
                        if (ind != numSamples) {
                            Tuple mt = Tuple.merge(tuple, t);
                            DataJoinKey nkey = this.getKey(mt, sorters, datasetID, mapper, job);
                            samples.set(ind, nkey);
                        }
                    }

                    freq *= (numSamples - collector.getOutKey().size()) / (double) numSamples;
                }
                key = reader.createKey();
                value = reader.createValue();
            }
        }
        reader.close();
    }
    LOGGER.info("Samples have been collected, return.");
    return samples.toArray();
}

From source file:ml.shifu.shifu.core.dtrain.nn.AbstractNNWorker.java

/**
 * Add to training set or validation set according to validation rate.
 * /*from w  ww . j  av a 2s  .  c om*/
 * @param hashcode
 *            the hash code of the data
 * @param pair
 *            data instance
 * @param isValidation
 *            if it is validation
 * @return if in training, training is true, others are false.
 */
protected boolean addDataPairToDataSet(long hashcode, FloatMLDataPair pair, boolean isValidation) {
    if (this.isKFoldCV) {
        int k = this.modelConfig.getTrain().getNumKFold();
        if (hashcode % k == this.trainerId) {
            this.validationData.add(pair);
            if (isPositive(pair.getIdealArray()[0])) {
                this.positiveValidationCount += 1L;
            } else {
                this.negativeValidationCount += 1L;
            }
            return false;
        } else {
            this.trainingData.add(pair);
            if (isPositive(pair.getIdealArray()[0])) {
                this.positiveTrainCount += 1L;
            } else {
                this.negativeTrainCount += 1L;
            }
            return true;
        }
    }

    if (this.isSpecificValidation) {
        if (isValidation) {
            this.validationData.add(pair);
            if (isPositive(pair.getIdealArray()[0])) {
                this.positiveValidationCount += 1L;
            } else {
                this.negativeValidationCount += 1L;
            }
            return false;
        } else {
            this.trainingData.add(pair);
            if (isPositive(pair.getIdealArray()[0])) {
                this.positiveTrainCount += 1L;
            } else {
                this.negativeTrainCount += 1L;
            }
            return true;
        }
    } else {
        if (Double.compare(this.modelConfig.getValidSetRate(), 0d) != 0) {
            int classValue = (int) (pair.getIdealArray()[0] + 0.01f);
            Random random = null;
            if (this.isStratifiedSampling) {
                // each class use one random instance
                random = validationRandomMap.get(classValue);
                if (random == null) {
                    random = new Random();
                    this.validationRandomMap.put(classValue, random);
                }
            } else {
                // all data use one random instance
                random = validationRandomMap.get(0);
                if (random == null) {
                    random = new Random();
                    this.validationRandomMap.put(0, random);
                }
            }

            if (this.modelConfig.isFixInitialInput()) {
                // for fix initial input, if hashcode%100 is in [start-hashcode, end-hashcode), validation,
                // otherwise training. start hashcode in different job is different to make sure bagging jobs have
                // different data. if end-hashcode is over 100, then check if hashcode is in [start-hashcode, 100]
                // or [0, end-hashcode]
                int startHashCode = (100 / this.modelConfig.getBaggingNum()) * this.trainerId;
                int endHashCode = startHashCode
                        + Double.valueOf(this.modelConfig.getValidSetRate() * 100).intValue();
                if (isInRange(hashcode, startHashCode, endHashCode)) {
                    this.validationData.add(pair);
                    if (isPositive(pair.getIdealArray()[0])) {
                        this.positiveValidationCount += 1L;
                    } else {
                        this.negativeValidationCount += 1L;
                    }
                    return false;
                } else {
                    this.trainingData.add(pair);
                    if (isPositive(pair.getIdealArray()[0])) {
                        this.positiveTrainCount += 1L;
                    } else {
                        this.negativeTrainCount += 1L;
                    }
                    return true;
                }
            } else {
                // not fixed initial input, if random value >= validRate, training, otherwise validation.
                if (random.nextDouble() >= this.modelConfig.getValidSetRate()) {
                    this.trainingData.add(pair);
                    if (isPositive(pair.getIdealArray()[0])) {
                        this.positiveTrainCount += 1L;
                    } else {
                        this.negativeTrainCount += 1L;
                    }
                    return true;
                } else {
                    this.validationData.add(pair);
                    if (isPositive(pair.getIdealArray()[0])) {
                        this.positiveValidationCount += 1L;
                    } else {
                        this.negativeValidationCount += 1L;
                    }
                    return false;
                }
            }
        } else {
            this.trainingData.add(pair);
            if (isPositive(pair.getIdealArray()[0])) {
                this.positiveTrainCount += 1L;
            } else {
                this.negativeTrainCount += 1L;
            }
            return true;
        }
    }
}

From source file:org.roaringbitmap.TestRoaringBitmap.java

@Test
public void flipTestBig() {
    final int numCases = 1000;
    System.out.println("flipTestBig for " + numCases + " tests");
    final RoaringBitmap rb = new RoaringBitmap();
    final BitSet bs = new BitSet();
    final Random r = new Random(3333);
    int checkTime = 2;

    for (int i = 0; i < numCases; ++i) {
        final int start = r.nextInt(65536 * 20);
        int end = r.nextInt(65536 * 20);
        if (r.nextDouble() < 0.1)
            end = start + r.nextInt(100);
        rb.flip(start, end);//from w ww  .  j  ava 2s  . co  m
        if (start < end)
            bs.flip(start, end); // throws exception
        // otherwise
        // insert some more ANDs to keep things sparser
        if (r.nextDouble() < 0.2) {
            final RoaringBitmap mask = new RoaringBitmap();
            final BitSet mask1 = new BitSet();
            final int startM = r.nextInt(65536 * 20);
            final int endM = startM + 100000;
            mask.flip(startM, endM);
            mask1.flip(startM, endM);
            mask.flip(0, 65536 * 20 + 100000);
            mask1.flip(0, 65536 * 20 + 100000);
            rb.and(mask);
            bs.and(mask1);
        }
        // see if we can detect incorrectly shared containers
        if (r.nextDouble() < 0.1) {
            final RoaringBitmap irrelevant = RoaringBitmap.flip(rb, 10, 100000);
            irrelevant.flip(5, 200000);
            irrelevant.flip(190000, 260000);
        }
        if (i > checkTime) {
            Assert.assertTrue(equals(bs, rb));
            checkTime *= 1.5;
        }
    }
}

From source file:org.dllearner.algorithms.qtl.experiments.QTLEvaluation.java

private Pair<List<String>, List<String>> generateNoise(List<String> examples, String sparqlQuery, double noise,
        Random randomGen) {
    // generate noise example candidates
    List<String> noiseCandidateExamples = null;
    switch (noiseMethod) {
    case RANDOM://from  ww  w  . j  a  v  a  2 s. c  o  m
        noiseCandidateExamples = generateNoiseCandidatesRandom(examples, 20);
        break;
    case SIMILAR:
        noiseCandidateExamples = generateNoiseCandidatesSimilar(examples, sparqlQuery, 20);
        break;
    case SIMILARITY_PARAMETERIZED://TODO implement configurable noise method
        break;
    default:
        noiseCandidateExamples = generateNoiseCandidatesRandom(examples, 20);
        break;
    }
    Collections.shuffle(noiseCandidateExamples, randomGen);

    // add some noise by using instances close to the positive examples
    // we have two ways of adding noise t_n
    // 1: iterate over pos. examples and if random number is below t_n, replace the example
    // 2: replace the (#posExamples * t_n) randomly chosen pos. examples by randomly chosen negative examples
    boolean probabilityBased = false;

    if (probabilityBased) {
        // 1. way
        List<String> newExamples = new ArrayList<>();
        for (Iterator<String> iterator = examples.iterator(); iterator.hasNext();) {
            String posExample = iterator.next();
            double rnd = randomGen.nextDouble();
            if (rnd <= noise) {
                // remove the positive example
                iterator.remove();
                // add one of the negative examples
                String negExample = noiseCandidateExamples.remove(0);
                newExamples.add(negExample);
                logger.info("Replacing " + posExample + " by " + negExample);
            }
        }
        examples.addAll(newExamples);

        return null;
    } else {
        // 2. way
        // replace at least 1 but not more than half of the examples
        int upperBound = examples.size() / 2;
        int nrOfPosExamples2Replace = (int) Math.ceil(noise * examples.size());
        nrOfPosExamples2Replace = Math.min(nrOfPosExamples2Replace, upperBound);
        logger.info("replacing " + nrOfPosExamples2Replace + "/" + examples.size()
                + " examples to introduce noise");
        List<String> posExamples2Replace = new ArrayList<>(examples.subList(0, nrOfPosExamples2Replace));
        examples.removeAll(posExamples2Replace);
        List<String> negExamples4Replacement = noiseCandidateExamples.subList(0, nrOfPosExamples2Replace);
        List<String> noiseExamples = new ArrayList<>(negExamples4Replacement);
        List<String> correctExamples = new ArrayList<>(examples);
        examples.addAll(negExamples4Replacement);
        logger.info("replaced " + posExamples2Replace + " by " + negExamples4Replacement);

        return new Pair<>(correctExamples, noiseExamples);
    }
}

From source file:org.roaringbitmap.TestRoaringBitmap.java

@Test
public void flipTestBigA() {
    final int numCases = 1000;
    final BitSet bs = new BitSet();
    final Random r = new Random(3333);
    int checkTime = 2;
    RoaringBitmap rb1 = new RoaringBitmap(), rb2 = null; // alternate
    // between//from   ww w. j a  v  a 2  s  .  com
    // them
    for (int i = 0; i < numCases; ++i) {
        final int start = r.nextInt(65536 * 20);
        int end = r.nextInt(65536 * 20);
        if (r.nextDouble() < 0.1)
            end = start + r.nextInt(100);

        if ((i & 1) == 0) {
            rb2 = RoaringBitmap.flip(rb1, start, end);
            // tweak the other, catch bad sharing
            int r1 = r.nextInt(65536 * 20);
            int r2 = r.nextInt(65536 * 20);
            rb1.flip(r1, r2);
        } else {
            rb1 = RoaringBitmap.flip(rb2, start, end);
            int r1 = r.nextInt(65536 * 20);
            int r2 = r.nextInt(65536 * 20);
            rb2.flip(r1, r2);
        }

        if (start < end) {
            bs.flip(start, end); // throws exception
            // otherwise
        }
        // insert some more ANDs to keep things sparser
        if (r.nextDouble() < 0.2 && (i & 1) == 0) {
            final RoaringBitmap mask = new RoaringBitmap();
            final BitSet mask1 = new BitSet();
            final int startM = r.nextInt(65536 * 20);
            final int endM = startM + 100000;
            mask.flip(startM, endM);
            mask1.flip(startM, endM);
            mask.flip(0, 65536 * 20 + 100000);
            mask1.flip(0, 65536 * 20 + 100000);
            rb2.and(mask);
            bs.and(mask1);
        }
        if (i > checkTime) {
            System.out.println("check after " + i + ", card = " + rb2.getCardinality());
            final RoaringBitmap rb = (i & 1) == 0 ? rb2 : rb1;
            final boolean status = equals(bs, rb);
            Assert.assertTrue(status);
            checkTime *= 1.5;
        }
    }
}