Example usage for java.util BitSet get

Introduction

In this page you can find the example usage for java.util BitSet get.

Prototype

public boolean get(int bitIndex)

Source Link

Document

Returns the value of the bit with the specified index.

Usage

From source file:org.apache.carbondata.hadoop.api.CarbonInputFormat.java

/**
 * get data blocks of given segment//from w  w  w  . j  a  v a  2 s  .  co  m
 */
protected List<CarbonInputSplit> getDataBlocksOfSegment(JobContext job, CarbonTable carbonTable,
        Expression expression, BitSet matchedPartitions, List<Segment> segmentIds, PartitionInfo partitionInfo,
        List<Integer> oldPartitionIdList) throws IOException {

    QueryStatisticsRecorder recorder = CarbonTimeStatisticsFactory.createDriverRecorder();
    QueryStatistic statistic = new QueryStatistic();

    // get tokens for all the required FileSystem for table path
    TokenCache.obtainTokensForNamenodes(job.getCredentials(),
            new Path[] { new Path(carbonTable.getTablePath()) }, job.getConfiguration());
    List<ExtendedBlocklet> prunedBlocklets = getPrunedBlocklets(job, carbonTable, expression, segmentIds);
    List<CarbonInputSplit> resultFilteredBlocks = new ArrayList<>();
    int partitionIndex = 0;
    List<Integer> partitionIdList = new ArrayList<>();
    if (partitionInfo != null && partitionInfo.getPartitionType() != PartitionType.NATIVE_HIVE) {
        partitionIdList = partitionInfo.getPartitionIds();
    }
    for (ExtendedBlocklet blocklet : prunedBlocklets) {

        // OldPartitionIdList is only used in alter table partition command because it change
        // partition info first and then read data.
        // For other normal query should use newest partitionIdList
        if (partitionInfo != null && partitionInfo.getPartitionType() != PartitionType.NATIVE_HIVE) {
            long partitionId = CarbonTablePath.DataFileUtil
                    .getTaskIdFromTaskNo(CarbonTablePath.DataFileUtil.getTaskNo(blocklet.getPath()));
            if (oldPartitionIdList != null) {
                partitionIndex = oldPartitionIdList.indexOf((int) partitionId);
            } else {
                partitionIndex = partitionIdList.indexOf((int) partitionId);
            }
        }
        if (partitionIndex != -1) {
            // matchedPartitions variable will be null in two cases as follows
            // 1. the table is not a partition table
            // 2. the table is a partition table, and all partitions are matched by query
            // for partition table, the task id of carbaondata file name is the partition id.
            // if this partition is not required, here will skip it.
            if (matchedPartitions == null || matchedPartitions.get(partitionIndex)) {
                resultFilteredBlocks.add(blocklet.getInputSplit());
            }
        }
    }
    statistic.addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_DRIVER, System.currentTimeMillis());
    recorder.recordStatisticsForDriver(statistic, job.getConfiguration().get("query.id"));
    return resultFilteredBlocks;
}

From source file:org.apache.hadoop.hdfs.TestReconstructStripedFile.java

/**
 * Test the file blocks reconstruction.//from w ww.  java  2s  .  co m
 * 1. Check the replica is reconstructed in the target datanode,
 *    and verify the block replica length, generationStamp and content.
 * 2. Read the file and verify content.
 */
private void assertFileBlocksReconstruction(String fileName, int fileLen, ReconstructionType type,
        int toRecoverBlockNum) throws Exception {
    if (toRecoverBlockNum < 1 || toRecoverBlockNum > parityBlkNum) {
        Assert.fail("toRecoverBlockNum should be between 1 ~ " + parityBlkNum);
    }
    assertTrue("File length must be positive.", fileLen > 0);

    Path file = new Path(fileName);

    final byte[] data = new byte[fileLen];
    Arrays.fill(data, (byte) 1);
    DFSTestUtil.writeFile(fs, file, data);
    StripedFileTestUtil.waitBlockGroupsReported(fs, fileName);

    LocatedBlocks locatedBlocks = StripedFileTestUtil.getLocatedBlocks(file, fs);
    assertEquals(locatedBlocks.getFileLength(), fileLen);

    LocatedStripedBlock lastBlock = (LocatedStripedBlock) locatedBlocks.getLastLocatedBlock();

    DatanodeInfo[] storageInfos = lastBlock.getLocations();
    byte[] indices = lastBlock.getBlockIndices();

    BitSet bitset = new BitSet(dnNum);
    for (DatanodeInfo storageInfo : storageInfos) {
        bitset.set(dnMap.get(storageInfo));
    }

    int[] dead = generateDeadDnIndices(type, toRecoverBlockNum, indices);
    LOG.info("Note: indices == " + Arrays.toString(indices) + ". Generate errors on datanodes: "
            + Arrays.toString(dead));

    DatanodeInfo[] dataDNs = new DatanodeInfo[toRecoverBlockNum];
    int[] deadDnIndices = new int[toRecoverBlockNum];
    ExtendedBlock[] blocks = new ExtendedBlock[toRecoverBlockNum];
    File[] replicas = new File[toRecoverBlockNum];
    long[] replicaLengths = new long[toRecoverBlockNum];
    File[] metadatas = new File[toRecoverBlockNum];
    byte[][] replicaContents = new byte[toRecoverBlockNum][];
    Map<ExtendedBlock, DataNode> errorMap = new HashMap<>(dead.length);
    for (int i = 0; i < toRecoverBlockNum; i++) {
        dataDNs[i] = storageInfos[dead[i]];
        deadDnIndices[i] = dnMap.get(dataDNs[i]);

        // Check the block replica file on deadDn before it dead.
        blocks[i] = StripedBlockUtil.constructInternalBlock(lastBlock.getBlock(), cellSize, dataBlkNum,
                indices[dead[i]]);
        errorMap.put(blocks[i], cluster.getDataNodes().get(deadDnIndices[i]));
        replicas[i] = cluster.getBlockFile(deadDnIndices[i], blocks[i]);
        replicaLengths[i] = replicas[i].length();
        metadatas[i] = cluster.getBlockMetadataFile(deadDnIndices[i], blocks[i]);
        // the block replica on the datanode should be the same as expected
        assertEquals(replicaLengths[i], StripedBlockUtil.getInternalBlockLength(lastBlock.getBlockSize(),
                cellSize, dataBlkNum, indices[dead[i]]));
        assertTrue(metadatas[i].getName().endsWith(blocks[i].getGenerationStamp() + ".meta"));
        LOG.info("replica " + i + " locates in file: " + replicas[i]);
        replicaContents[i] = DFSTestUtil.readFileAsBytes(replicas[i]);
    }

    int lastGroupDataLen = fileLen % (dataBlkNum * blockSize);
    int lastGroupNumBlk = lastGroupDataLen == 0 ? dataBlkNum
            : Math.min(dataBlkNum, ((lastGroupDataLen - 1) / cellSize + 1));
    int groupSize = lastGroupNumBlk + parityBlkNum;

    // shutdown datanodes or generate corruption
    int stoppedDN = generateErrors(errorMap, type);

    // Check the locatedBlocks of the file again
    locatedBlocks = StripedFileTestUtil.getLocatedBlocks(file, fs);
    lastBlock = (LocatedStripedBlock) locatedBlocks.getLastLocatedBlock();
    storageInfos = lastBlock.getLocations();
    assertEquals(storageInfos.length, groupSize - stoppedDN);

    int[] targetDNs = new int[dnNum - groupSize];
    int n = 0;
    for (int i = 0; i < dnNum; i++) {
        if (!bitset.get(i)) { // not contain replica of the block.
            targetDNs[n++] = i;
        }
    }

    StripedFileTestUtil.waitForReconstructionFinished(file, fs, groupSize);

    targetDNs = sortTargetsByReplicas(blocks, targetDNs);

    // Check the replica on the new target node.
    for (int i = 0; i < toRecoverBlockNum; i++) {
        File replicaAfterReconstruction = cluster.getBlockFile(targetDNs[i], blocks[i]);
        LOG.info("replica after reconstruction " + replicaAfterReconstruction);
        File metadataAfterReconstruction = cluster.getBlockMetadataFile(targetDNs[i], blocks[i]);
        assertEquals(replicaLengths[i], replicaAfterReconstruction.length());
        LOG.info("replica before " + replicas[i]);
        assertTrue(metadataAfterReconstruction.getName().endsWith(blocks[i].getGenerationStamp() + ".meta"));
        byte[] replicaContentAfterReconstruction = DFSTestUtil.readFileAsBytes(replicaAfterReconstruction);

        Assert.assertArrayEquals(replicaContents[i], replicaContentAfterReconstruction);
    }
}

From source file:android.support.v7.widget.StaggeredGridLayoutManager2.java

/**
 * Checks for gaps if we've reached to the top of the list.
 * <p>//from   w w  w  .j a  v  a2  s  .co m
 * Intermediate gaps created by full span items are tracked via mLaidOutInvalidFullSpan field.
 */
View hasGapsToFix() {
    int startChildIndex = 0;
    int endChildIndex = getChildCount() - 1;
    BitSet mSpansToCheck = new BitSet(mSpanCount);
    mSpansToCheck.set(0, mSpanCount, true);

    final int firstChildIndex, childLimit;
    final int preferredSpanDir = mOrientation == VERTICAL && isLayoutRTL() ? 1 : -1;

    if (mShouldReverseLayout) {
        firstChildIndex = endChildIndex - 1;
        childLimit = startChildIndex - 1;
    } else {
        firstChildIndex = startChildIndex;
        childLimit = endChildIndex;
    }
    final int nextChildDiff = firstChildIndex < childLimit ? 1 : -1;
    for (int i = firstChildIndex; i != childLimit; i += nextChildDiff) {
        View child = getChildAt(i);
        LayoutParams lp = (LayoutParams) child.getLayoutParams();
        if (mSpansToCheck.get(lp.mSpan.mIndex)) {
            if (checkSpanForGap(lp.mSpan)) {
                return child;
            }
            mSpansToCheck.clear(lp.mSpan.mIndex);
        }
        if (lp.mFullSpan) {
            continue; // quick reject
        }

        if (i + nextChildDiff != childLimit) {
            View nextChild = getChildAt(i + nextChildDiff);
            boolean compareSpans = false;
            if (mShouldReverseLayout) {
                // ensure child's end is below nextChild's end
                int myEnd = mPrimaryOrientation.getDecoratedEnd(child);
                int nextEnd = mPrimaryOrientation.getDecoratedEnd(nextChild);
                if (myEnd < nextEnd) {
                    return child;//i should have a better position
                } else if (myEnd == nextEnd) {
                    compareSpans = true;
                }
            } else {
                int myStart = mPrimaryOrientation.getDecoratedStart(child);
                int nextStart = mPrimaryOrientation.getDecoratedStart(nextChild);
                if (myStart > nextStart) {
                    return child;//i should have a better position
                } else if (myStart == nextStart) {
                    compareSpans = true;
                }
            }
            if (compareSpans) {
                // equal, check span indices.
                LayoutParams nextLp = (LayoutParams) nextChild.getLayoutParams();
                if (lp.mSpan.mIndex - nextLp.mSpan.mIndex < 0 != preferredSpanDir < 0) {
                    return child;
                }
            }
        }
    }
    // everything looks good
    return null;
}

From source file:org.apache.openjpa.kernel.StateManagerImpl.java

/**
 * Rollback state of the managed instance to the given savepoint.
 *///  www  .  j  a  v  a  2  s.  com
void rollbackToSavepoint(SavepointFieldManager savepoint) {
    _state = savepoint.getPCState();
    BitSet loaded = savepoint.getLoaded();
    for (int i = 0, len = loaded.length(); i < len; i++) {
        if (loaded.get(i) && savepoint.restoreField(i)) {
            provideField(savepoint.getCopy(), savepoint, i);
            replaceField(_pc, savepoint, i);
        }
    }
    _loaded = loaded;
    _dirty = savepoint.getDirty();
    _flush = savepoint.getFlushed();
    _version = savepoint.getVersion();
    _loadVersion = savepoint.getLoadVersion();
}

From source file:android.support.v7.widget.StaggeredGridLayoutManager.java

/**
 * Checks for gaps if we've reached to the top of the list.
 * <p>//from   w  w  w  . j  a va  2 s  .c  o m
 * Intermediate gaps created by full span items are tracked via mLaidOutInvalidFullSpan field.
 */
View hasGapsToFix() {
    int startChildIndex = 0;
    int endChildIndex = getChildCount() - 1;
    BitSet mSpansToCheck = new BitSet(mSpanCount);
    mSpansToCheck.set(0, mSpanCount, true);

    final int firstChildIndex, childLimit;
    final int preferredSpanDir = mOrientation == VERTICAL && isLayoutRTL() ? 1 : -1;

    if (mShouldReverseLayout) {
        firstChildIndex = endChildIndex;
        childLimit = startChildIndex - 1;
    } else {
        firstChildIndex = startChildIndex;
        childLimit = endChildIndex + 1;
    }
    final int nextChildDiff = firstChildIndex < childLimit ? 1 : -1;
    for (int i = firstChildIndex; i != childLimit; i += nextChildDiff) {
        View child = getChildAt(i);
        LayoutParams lp = (LayoutParams) child.getLayoutParams();
        if (mSpansToCheck.get(lp.mSpan.mIndex)) {
            if (checkSpanForGap(lp.mSpan)) {
                return child;
            }
            mSpansToCheck.clear(lp.mSpan.mIndex);
        }
        if (lp.mFullSpan) {
            continue; // quick reject
        }

        if (i + nextChildDiff != childLimit) {
            View nextChild = getChildAt(i + nextChildDiff);
            boolean compareSpans = false;
            if (mShouldReverseLayout) {
                // ensure child's end is below nextChild's end
                int myEnd = mPrimaryOrientation.getDecoratedEnd(child);
                int nextEnd = mPrimaryOrientation.getDecoratedEnd(nextChild);
                if (myEnd < nextEnd) {
                    return child;//i should have a better position
                } else if (myEnd == nextEnd) {
                    compareSpans = true;
                }
            } else {
                int myStart = mPrimaryOrientation.getDecoratedStart(child);
                int nextStart = mPrimaryOrientation.getDecoratedStart(nextChild);
                if (myStart > nextStart) {
                    return child;//i should have a better position
                } else if (myStart == nextStart) {
                    compareSpans = true;
                }
            }
            if (compareSpans) {
                // equal, check span indices.
                LayoutParams nextLp = (LayoutParams) nextChild.getLayoutParams();
                if (lp.mSpan.mIndex - nextLp.mSpan.mIndex < 0 != preferredSpanDir < 0) {
                    return child;
                }
            }
        }
    }
    // everything looks good
    return null;
}

From source file:com.jefftharris.passwdsafe.PasswdSafe.java

@Override
public boolean onPrepareOptionsMenu(Menu menu) {
    final BitSet options = new BitSet();
    options.set(MENU_BIT_HAS_CLOSE);/*w  w w  .  ja  v  a  2  s  .co  m*/

    itsFileDataFrag.useFileData(new PasswdFileDataUser() {
        @Override
        public void useFileData(@NonNull PasswdFileData fileData) {
            boolean fileEditable = fileData.canEdit();

            switch (itsCurrViewMode) {
            case VIEW_LIST: {
                options.set(MENU_BIT_CAN_ADD, fileEditable);
                options.set(MENU_BIT_HAS_SEARCH, true);
                if (fileEditable) {
                    options.set(MENU_BIT_HAS_FILE_OPS, true);
                    options.set(MENU_BIT_HAS_FILE_CHANGE_PASSWORD, fileData.isNotYubikey());
                    options.set(MENU_BIT_HAS_FILE_PROTECT, true);
                    options.set(MENU_BIT_PROTECT_ALL, itsLocation.getGroups().isEmpty());
                }
                if (fileData.canDelete()) {
                    options.set(MENU_BIT_HAS_FILE_OPS, true);
                    options.set(MENU_BIT_HAS_FILE_DELETE, true);
                }
                break;
            }
            case VIEW_RECORD: {
                options.set(MENU_BIT_CAN_ADD, fileEditable);
                break;
            }
            case INIT:
            case FILE_OPEN:
            case FILE_NEW:
            case VIEW_ABOUT:
            case VIEW_EXPIRATION:
            case VIEW_POLICY_LIST:
            case VIEW_PREFERENCES: {
                break;
            }
            case EDIT_RECORD:
            case CHANGING_PASSWORD: {
                options.set(MENU_BIT_HAS_CLOSE, false);
                break;
            }
            }
        }
    });

    MenuItem item = menu.findItem(R.id.menu_add);
    if (item != null) {
        item.setVisible(options.get(MENU_BIT_CAN_ADD));
    }

    item = menu.findItem(R.id.menu_close);
    if (item != null) {
        item.setVisible(options.get(MENU_BIT_HAS_CLOSE));
    }

    item = menu.findItem(R.id.menu_file_ops);
    if (item != null) {
        item.setVisible(options.get(MENU_BIT_HAS_FILE_OPS));
    }

    item = menu.findItem(R.id.menu_file_change_password);
    if (item != null) {
        item.setEnabled(options.get(MENU_BIT_HAS_FILE_CHANGE_PASSWORD));
    }

    if (options.get(MENU_BIT_HAS_FILE_OPS)) {
        boolean hasProtect = options.get(MENU_BIT_HAS_FILE_PROTECT);
        boolean viewProtectAll = options.get(MENU_BIT_PROTECT_ALL);
        item = menu.findItem(R.id.menu_file_protect_records);
        if (item != null) {
            item.setEnabled(hasProtect);
            item.setTitle(viewProtectAll ? R.string.protect_all : R.string.protect_group);
        }
        item = menu.findItem(R.id.menu_file_unprotect_records);
        if (item != null) {
            item.setEnabled(hasProtect);
            item.setTitle(viewProtectAll ? R.string.unprotect_all : R.string.unprotect_group);
        }

        item = menu.findItem(R.id.menu_file_delete);
        if (item != null) {
            item.setEnabled(options.get(MENU_BIT_HAS_FILE_DELETE));
        }
    }

    item = menu.findItem(R.id.menu_search);
    if (item != null) {
        item.setVisible(options.get(MENU_BIT_HAS_SEARCH));
    }

    return super.onPrepareOptionsMenu(menu);
}

From source file:org.apache.hadoop.hdfs.TestRecoverStripedFile.java

/**
 * Test the file blocks recovery./*from   w ww . ja v a2  s . c om*/
 * 1. Check the replica is recovered in the target datanode, 
 *    and verify the block replica length, generationStamp and content.
 * 2. Read the file and verify content. 
 */
private void assertFileBlocksRecovery(String fileName, int fileLen, int recovery, int toRecoverBlockNum)
        throws Exception {
    if (recovery != 0 && recovery != 1 && recovery != 2) {
        Assert.fail("Invalid recovery: 0 is to recovery parity blocks,"
                + "1 is to recovery data blocks, 2 is any.");
    }
    if (toRecoverBlockNum < 1 || toRecoverBlockNum > parityBlkNum) {
        Assert.fail("toRecoverBlockNum should be between 1 ~ " + parityBlkNum);
    }

    Path file = new Path(fileName);

    final byte[] data = new byte[fileLen];
    ThreadLocalRandom.current().nextBytes(data);
    DFSTestUtil.writeFile(fs, file, data);
    StripedFileTestUtil.waitBlockGroupsReported(fs, fileName);

    LocatedBlocks locatedBlocks = getLocatedBlocks(file);
    assertEquals(locatedBlocks.getFileLength(), fileLen);

    LocatedStripedBlock lastBlock = (LocatedStripedBlock) locatedBlocks.getLastLocatedBlock();

    DatanodeInfo[] storageInfos = lastBlock.getLocations();
    byte[] indices = lastBlock.getBlockIndices();

    BitSet bitset = new BitSet(dnNum);
    for (DatanodeInfo storageInfo : storageInfos) {
        bitset.set(dnMap.get(storageInfo));
    }

    int[] toDead = new int[toRecoverBlockNum];
    int n = 0;
    for (int i = 0; i < indices.length; i++) {
        if (n < toRecoverBlockNum) {
            if (recovery == 0) {
                if (indices[i] >= dataBlkNum) {
                    toDead[n++] = i;
                }
            } else if (recovery == 1) {
                if (indices[i] < dataBlkNum) {
                    toDead[n++] = i;
                }
            } else {
                toDead[n++] = i;
            }
        } else {
            break;
        }
    }

    DatanodeInfo[] dataDNs = new DatanodeInfo[toRecoverBlockNum];
    int[] deadDnIndices = new int[toRecoverBlockNum];
    ExtendedBlock[] blocks = new ExtendedBlock[toRecoverBlockNum];
    File[] replicas = new File[toRecoverBlockNum];
    File[] metadatas = new File[toRecoverBlockNum];
    byte[][] replicaContents = new byte[toRecoverBlockNum][];
    for (int i = 0; i < toRecoverBlockNum; i++) {
        dataDNs[i] = storageInfos[toDead[i]];
        deadDnIndices[i] = dnMap.get(dataDNs[i]);

        // Check the block replica file on deadDn before it dead.
        blocks[i] = StripedBlockUtil.constructInternalBlock(lastBlock.getBlock(), cellSize, dataBlkNum,
                indices[toDead[i]]);
        replicas[i] = cluster.getBlockFile(deadDnIndices[i], blocks[i]);
        metadatas[i] = cluster.getBlockMetadataFile(deadDnIndices[i], blocks[i]);
        // the block replica on the datanode should be the same as expected
        assertEquals(replicas[i].length(), StripedBlockUtil.getInternalBlockLength(lastBlock.getBlockSize(),
                cellSize, dataBlkNum, indices[toDead[i]]));
        assertTrue(metadatas[i].getName().endsWith(blocks[i].getGenerationStamp() + ".meta"));
        replicaContents[i] = DFSTestUtil.readFileAsBytes(replicas[i]);
    }

    int cellsNum = (fileLen - 1) / cellSize + 1;
    int groupSize = Math.min(cellsNum, dataBlkNum) + parityBlkNum;

    for (int i = 0; i < toRecoverBlockNum; i++) {
        /*
         * Kill the datanode which contains one replica
         * We need to make sure it dead in namenode: clear its update time and
         * trigger NN to check heartbeat.
         */
        DataNode dn = cluster.getDataNodes().get(deadDnIndices[i]);
        dn.shutdown();
        cluster.setDataNodeDead(dn.getDatanodeId());
    }

    // Check the locatedBlocks of the file again
    locatedBlocks = getLocatedBlocks(file);
    lastBlock = (LocatedStripedBlock) locatedBlocks.getLastLocatedBlock();
    storageInfos = lastBlock.getLocations();
    assertEquals(storageInfos.length, groupSize - toRecoverBlockNum);

    int[] targetDNs = new int[dnNum - groupSize];
    n = 0;
    for (int i = 0; i < dnNum; i++) {
        if (!bitset.get(i)) { // not contain replica of the block.
            targetDNs[n++] = i;
        }
    }

    waitForRecoveryFinished(file, groupSize);

    targetDNs = sortTargetsByReplicas(blocks, targetDNs);

    // Check the replica on the new target node.
    for (int i = 0; i < toRecoverBlockNum; i++) {
        File replicaAfterRecovery = cluster.getBlockFile(targetDNs[i], blocks[i]);
        File metadataAfterRecovery = cluster.getBlockMetadataFile(targetDNs[i], blocks[i]);
        assertEquals(replicaAfterRecovery.length(), replicas[i].length());
        assertTrue(metadataAfterRecovery.getName().endsWith(blocks[i].getGenerationStamp() + ".meta"));
        byte[] replicaContentAfterRecovery = DFSTestUtil.readFileAsBytes(replicaAfterRecovery);

        Assert.assertArrayEquals(replicaContents[i], replicaContentAfterRecovery);
    }
}

From source file:org.apache.openjpa.kernel.StateManagerImpl.java

/**
 * Internal version of {@link OpenJPAStateManager#getUnloaded} that avoids
 * creating an empty bit set by returning null when there are no unloaded
 * fields./*w w w . j ava  2s  .co  m*/
 */
private BitSet getUnloadedInternal(FetchConfiguration fetch, int mode, BitSet exclude) {
    if (exclude == StoreContext.EXCLUDE_ALL)
        return null;

    BitSet fields = null;
    FieldMetaData[] fmds = _meta.getFields();
    boolean load;
    for (int i = 0; i < fmds.length; i++) {
        if (_loaded.get(i) || (exclude != null && exclude.get(i)))
            continue;

        switch (mode) {
        case LOAD_SERIALIZE:
            load = !fmds[i].isTransient();
            break;
        case LOAD_FGS:
            load = fetch == null || fetch.requiresFetch(fmds[i]) != FetchConfiguration.FETCH_NONE;
            break;
        default: // LOAD_ALL
            load = true;
        }

        if (load) {
            if (fields == null)
                fields = new BitSet(fmds.length);
            fields.set(i);
        }
    }
    return fields;
}

From source file:com.tamingtext.util.SplitInput.java

/** Perform a split on the specified input file. Results will be written to files of the same name in the specified 
 *  training and test output directories. The {@link #validate()} method is called prior to executing the split.
 *//*from   ww  w. j ava 2s.  c  o m*/
public void splitFile(Path inputFile) throws IOException {
    if (fs.getFileStatus(inputFile) == null) {
        throw new IOException(inputFile + " does not exist");
    } else if (fs.getFileStatus(inputFile).isDir()) {
        throw new IOException(inputFile + " is a directory");
    }

    validate();

    Path testOutputFile = new Path(testOutputDirectory, inputFile.getName());
    Path trainingOutputFile = new Path(trainingOutputDirectory, inputFile.getName());

    int lineCount = countLines(fs, inputFile, charset);

    log.info("{} has {} lines", inputFile.getName(), lineCount);

    int testSplitStart = 0;
    int testSplitSize = this.testSplitSize; // don't modify state
    BitSet randomSel = null;

    if (testRandomSelectionPct > 0 || testRandomSelectionSize > 0) {
        testSplitSize = this.testRandomSelectionSize;

        if (testRandomSelectionPct > 0) {
            testSplitSize = Math.round(lineCount * (testRandomSelectionPct / 100.0f));
        }
        log.info("{} test split size is {} based on random selection percentage {}",
                new Object[] { inputFile.getName(), testSplitSize, testRandomSelectionPct });
        long[] ridx = new long[testSplitSize];
        RandomSampler.sample(testSplitSize, lineCount - 1, testSplitSize, 0, ridx, 0, RandomUtils.getRandom());
        randomSel = new BitSet(lineCount);
        for (long idx : ridx) {
            randomSel.set((int) idx + 1);
        }
    } else {
        if (testSplitPct > 0) { // calculate split size based on percentage
            testSplitSize = Math.round(lineCount * (testSplitPct / 100.0f));
            log.info("{} test split size is {} based on percentage {}",
                    new Object[] { inputFile.getName(), testSplitSize, testSplitPct });
        } else {
            log.info("{} test split size is {}", inputFile.getName(), testSplitSize);
        }

        if (splitLocation > 0) { // calculate start of split based on percentage
            testSplitStart = Math.round(lineCount * (splitLocation / 100.0f));
            if (lineCount - testSplitStart < testSplitSize) {
                // adjust split start downwards based on split size.
                testSplitStart = lineCount - testSplitSize;
            }
            log.info("{} test split start is {} based on split location {}",
                    new Object[] { inputFile.getName(), testSplitStart, splitLocation });
        }

        if (testSplitStart < 0) {
            throw new IllegalArgumentException(
                    "test split size for " + inputFile + " is too large, it would produce an "
                            + "empty training set from the initial set of " + lineCount + " examples");
        } else if ((lineCount - testSplitSize) < testSplitSize) {
            log.warn(
                    "Test set size for {} may be too large, {} is larger than the number of "
                            + "lines remaining in the training set: {}",
                    new Object[] { inputFile, testSplitSize, lineCount - testSplitSize });
        }
    }

    BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(inputFile), charset));
    Writer trainingWriter = new OutputStreamWriter(fs.create(trainingOutputFile), charset);
    Writer testWriter = new OutputStreamWriter(fs.create(testOutputFile), charset);

    int pos = 0;
    int trainCount = 0;
    int testCount = 0;

    String line;
    while ((line = reader.readLine()) != null) {
        pos++;

        Writer writer;
        if (testRandomSelectionPct > 0) { // Randomly choose
            writer = randomSel.get(pos) ? testWriter : trainingWriter;
        } else { // Choose based on location
            writer = pos > testSplitStart ? testWriter : trainingWriter;
        }

        if (writer == testWriter) {
            if (testCount >= testSplitSize) {
                writer = trainingWriter;
            } else {
                testCount++;
            }
        }

        if (writer == trainingWriter) {
            trainCount++;
        }

        writer.write(line);
        writer.write('\n');
    }

    IOUtils.close(Collections.singleton(trainingWriter));
    IOUtils.close(Collections.singleton(testWriter));

    log.info("file: {}, input: {} train: {}, test: {} starting at {}",
            new Object[] { inputFile.getName(), lineCount, trainCount, testCount, testSplitStart });

    // testing;
    if (callback != null) {
        callback.splitComplete(inputFile, lineCount, trainCount, testCount, testSplitStart);
    }
}

From source file:org.apache.mahout.classifier.bayes.SplitBayesInput.java

/** Perform a split on the specified input file. Results will be written to files of the same name in the specified 
 *  training and test output directories. The {@link #validate()} method is called prior to executing the split.
 *///from ww  w. ja  v  a2  s .  com
public void splitFile(Path inputFile) throws IOException {
    if (fs.getFileStatus(inputFile) == null) {
        throw new IOException(inputFile + " does not exist");
    } else if (fs.getFileStatus(inputFile).isDir()) {
        throw new IOException(inputFile + " is a directory");
    }

    validate();

    Path testOutputFile = new Path(testOutputDirectory, inputFile.getName());
    Path trainingOutputFile = new Path(trainingOutputDirectory, inputFile.getName());

    int lineCount = countLines(fs, inputFile, charset);

    log.info("{} has {} lines", inputFile.getName(), lineCount);

    int testSplitStart = 0;
    int testSplitSize = this.testSplitSize; // don't modify state
    BitSet randomSel = null;

    if (testRandomSelectionPct > 0 || testRandomSelectionSize > 0) {
        testSplitSize = this.testRandomSelectionSize;

        if (testRandomSelectionPct > 0) {
            testSplitSize = Math.round(lineCount * testRandomSelectionPct / 100.0f);
        }
        log.info("{} test split size is {} based on random selection percentage {}",
                new Object[] { inputFile.getName(), testSplitSize, testRandomSelectionPct });
        long[] ridx = new long[testSplitSize];
        RandomSampler.sample(testSplitSize, lineCount - 1, testSplitSize, 0, ridx, 0, RandomUtils.getRandom());
        randomSel = new BitSet(lineCount);
        for (long idx : ridx) {
            randomSel.set((int) idx + 1);
        }
    } else {
        if (testSplitPct > 0) { // calculate split size based on percentage
            testSplitSize = Math.round(lineCount * testSplitPct / 100.0f);
            log.info("{} test split size is {} based on percentage {}",
                    new Object[] { inputFile.getName(), testSplitSize, testSplitPct });
        } else {
            log.info("{} test split size is {}", inputFile.getName(), testSplitSize);
        }

        if (splitLocation > 0) { // calculate start of split based on percentage
            testSplitStart = Math.round(lineCount * splitLocation / 100.0f);
            if (lineCount - testSplitStart < testSplitSize) {
                // adjust split start downwards based on split size.
                testSplitStart = lineCount - testSplitSize;
            }
            log.info("{} test split start is {} based on split location {}",
                    new Object[] { inputFile.getName(), testSplitStart, splitLocation });
        }

        if (testSplitStart < 0) {
            throw new IllegalArgumentException(
                    "test split size for " + inputFile + " is too large, it would produce an "
                            + "empty training set from the initial set of " + lineCount + " examples");
        } else if (lineCount - testSplitSize < testSplitSize) {
            log.warn(
                    "Test set size for {} may be too large, {} is larger than the number of "
                            + "lines remaining in the training set: {}",
                    new Object[] { inputFile, testSplitSize, lineCount - testSplitSize });
        }
    }

    BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(inputFile), charset));
    Writer trainingWriter = new OutputStreamWriter(fs.create(trainingOutputFile), charset);
    Writer testWriter = new OutputStreamWriter(fs.create(testOutputFile), charset);

    int trainCount = 0;
    int testCount = 0;

    try {

        String line;
        int pos = 0;
        while ((line = reader.readLine()) != null) {
            pos++;

            Writer writer;
            if (testRandomSelectionPct > 0) { // Randomly choose
                writer = randomSel.get(pos) ? testWriter : trainingWriter;
            } else { // Choose based on location
                writer = pos > testSplitStart ? testWriter : trainingWriter;
            }

            if (writer == testWriter) {
                if (testCount >= testSplitSize) {
                    writer = trainingWriter;
                } else {
                    testCount++;
                }
            }

            if (writer == trainingWriter) {
                trainCount++;
            }

            writer.write(line);
            writer.write('\n');
        }

    } finally {
        IOUtils.quietClose(reader);
        IOUtils.quietClose(trainingWriter);
        IOUtils.quietClose(testWriter);
    }

    log.info("file: {}, input: {} train: {}, test: {} starting at {}",
            new Object[] { inputFile.getName(), lineCount, trainCount, testCount, testSplitStart });

    // testing;
    if (callback != null) {
        callback.splitComplete(inputFile, lineCount, trainCount, testCount, testSplitStart);
    }
}