Example usage for java.lang CharSequence length

List of usage examples for java.lang CharSequence length

Introduction

In this page you can find the example usage for java.lang CharSequence length.

Prototype

int length();

Source Link

Document

Returns the length of this character sequence.

Usage

From source file:it.unimi.dsi.util.ImmutableExternalPrefixMap.java

/** map external map.
 * //from ww w . j  a v  a 2  s . c  om
 * <P>This constructor does not assume that strings returned by <code>terms.iterator()</code>
 * will be distinct. Thus, it can be safely used with {@link FileLinesCollection}.
 * 
 * @param terms an iterable whose iterator will enumerate in lexicographical order the terms for the map.
 * @param blockSizeInBytes the block size (in bytes).
 * @param dumpStreamFilename the name of the dump stream, or <code>null</code> for a map
 * with an automatic dump stream.
 */

public ImmutableExternalPrefixMap(final Iterable<? extends CharSequence> terms, final int blockSizeInBytes,
        final CharSequence dumpStreamFilename) throws IOException {
    this.blockSize = blockSizeInBytes * 8;
    this.selfContained = dumpStreamFilename == null;
    // First of all, we gather frequencies for all Unicode characters
    int[] frequency = new int[Character.MAX_VALUE + 1];
    int maxWordLength = 0;
    CharSequence s;
    int count = 0;

    final MutableString prevTerm = new MutableString();

    for (Iterator<? extends CharSequence> i = terms.iterator(); i.hasNext();) {
        s = i.next();
        maxWordLength = Math.max(s.length(), maxWordLength);
        for (int j = s.length(); j-- != 0;)
            frequency[s.charAt(j)]++;
        if (count > 0 && prevTerm.compareTo(s) >= 0)
            throw new IllegalArgumentException(
                    "The provided term collection is not sorted, or contains duplicates [" + prevTerm + ", " + s
                            + "]");
        count++;
        prevTerm.replace(s);
    }

    size = count;

    // Then, we compute the number of actually used characters
    count = 0;
    for (int i = frequency.length; i-- != 0;)
        if (frequency[i] != 0)
            count++;

    /* Now we remap used characters in f, building at the same time maps from 
     * symbol to characters and from characters to symbols. */

    int[] packedFrequency = new int[count];
    symbol2char = new char[count];
    char2symbol = new Char2IntOpenHashMap(count);
    char2symbol.defaultReturnValue(-1);

    for (int i = frequency.length, k = count; i-- != 0;) {
        if (frequency[i] != 0) {
            packedFrequency[--k] = frequency[i];
            symbol2char[k] = (char) i;
            char2symbol.put((char) i, k);
        }
    }

    char2symbol.trim();

    // We now build the coder used to code the strings

    final PrefixCoder prefixCoder;
    final PrefixCodec codec;
    final BitVector[] codeWord;

    if (packedFrequency.length != 0) {
        codec = new HuTuckerCodec(packedFrequency);
        prefixCoder = codec.coder();
        decoder = codec.decoder();
        codeWord = prefixCoder.codeWords();
    } else {
        // This handles the case of a collection without words
        codec = null;
        prefixCoder = null;
        decoder = null;
        codeWord = null;
    }

    packedFrequency = frequency = null;

    // We now compress all strings using the given codec mixed with front coding
    final OutputBitStream output;
    if (selfContained) {
        final File temp = File.createTempFile(this.getClass().getName(), ".dump");
        temp.deleteOnExit();
        tempDumpStreamFilename = temp.toString();
        output = new OutputBitStream(temp, blockSizeInBytes);
    } else
        output = new OutputBitStream(tempDumpStreamFilename = dumpStreamFilename.toString(), blockSizeInBytes);

    // This array will contain the delimiting words (the ones at the start of each block)
    boolean isDelimiter;

    int length, prevTermLength = 0, bits;
    int prefixLength = 0, termCount = 0;
    int currBuffer = 0;

    final IntArrayList blockStarts = new IntArrayList();
    final IntArrayList blockOffsets = new IntArrayList();
    final ObjectArrayList<MutableString> delimiters = new ObjectArrayList<MutableString>();
    prevTerm.length(0);

    for (Iterator<?> i = terms.iterator(); i.hasNext();) {
        s = (CharSequence) i.next();
        length = s.length();

        isDelimiter = false;

        // We compute the common prefix and the number of bits that are necessary to code the next term.
        bits = 0;
        for (prefixLength = 0; prefixLength < length && prefixLength < prevTermLength
                && prevTerm.charAt(prefixLength) == s.charAt(prefixLength); prefixLength++)
            ;
        for (int j = prefixLength; j < length; j++)
            bits += codeWord[char2symbol.get(s.charAt(j))].size();

        //if ( bits + length + 1 > blockSize ) throw new IllegalArgumentException( "The string \"" + s + "\" is too long to be encoded with block size " + blockSizeInBytes );

        // If the next term would overflow the block, and we are not at the start of a block, we align.
        if (output.writtenBits() % blockSize != 0 && output.writtenBits() / blockSize != (output.writtenBits()
                + (length - prefixLength + 1) + (prefixLength + 1) + bits - 1) / blockSize) {
            // We align by writing 0es.
            if (DEBUG)
                System.err.println(
                        "Aligning away " + (blockSize - output.writtenBits() % blockSize) + " bits...");
            for (int j = (int) (blockSize - output.writtenBits() % blockSize); j-- != 0;)
                output.writeBit(0);
            if (ASSERTS)
                assert output.writtenBits() % blockSize == 0;
        }

        if (output.writtenBits() % blockSize == 0) {
            isDelimiter = true;
            prefixLength = 0;
            blockOffsets.add((int) (output.writtenBits() / blockSize));
        }

        // Note that delimiters do not get the prefix length, as it's 0.
        if (!isDelimiter)
            output.writeUnary(prefixLength);
        output.writeUnary(length - prefixLength);

        // Write the next coded suffix on output.
        for (int j = prefixLength; j < length; j++) {
            BitVector c = codeWord[char2symbol.get(s.charAt(j))];
            for (int k = 0; k < c.size(); k++)
                output.writeBit(c.getBoolean(k));
        }

        if (isDelimiter) {
            if (DEBUG)
                System.err.println(
                        "First string of block " + blockStarts.size() + ": " + termCount + " (" + s + ")");
            // The current word starts a new block
            blockStarts.add(termCount);
            // We do not want to rely on s being immutable.
            delimiters.add(new MutableString(s));
        }

        currBuffer = 1 - currBuffer;
        prevTerm.replace(s);
        prevTermLength = length;
        termCount++;
    }

    output.align();
    dumpStreamLength = output.writtenBits() / 8;
    output.close();

    intervalApproximator = prefixCoder == null ? null
            : new ImmutableBinaryTrie<CharSequence>(delimiters,
                    new PrefixCoderTransformationStrategy(prefixCoder, char2symbol, false));

    blockStarts.add(size);
    blockStart = blockStarts.toIntArray();
    blockOffset = blockOffsets.toIntArray();

    // We use a buffer of the same size of a block, hoping in fast I/O. */
    dumpStream = new InputBitStream(tempDumpStreamFilename, blockSizeInBytes);
}

From source file:com.snt.bt.recon.activities.MainActivity.java

public void writeTextFile(String data) {
    mDebugText.append(data);/*  w w  w.j a  v  a 2 s .c  o  m*/
    // Erase excessive lines
    int excessLineNumber = mDebugText.getLineCount() - MAX_LINE;
    if (excessLineNumber > 0) {
        int eolIndex = -1;
        CharSequence charSequence = mDebugText.getText();
        for (int i = 0; i < excessLineNumber; i++) {
            do {
                eolIndex++;
            } while (eolIndex < charSequence.length() && charSequence.charAt(eolIndex) != '\n');
        }
        if (eolIndex < charSequence.length()) {
            mDebugText.getEditableText().delete(0, eolIndex + 1);
        } else {
            mDebugText.setText("");
        }
    }
}

From source file:org.cryptomator.crypto.aes256.Aes256Cryptor.java

private SecretKey scrypt(CharSequence password, byte[] salt, int costParam, int blockSize,
        int keyLengthInBits) {
    // use sb, as password.toString's implementation is unknown
    final StringBuilder sb = new StringBuilder(password);
    final byte[] pw = sb.toString().getBytes();
    try {/*  w w  w  .  ja  v  a  2  s . c  o  m*/
        final byte[] key = SCrypt.generate(pw, salt, costParam, blockSize, 1, keyLengthInBits / Byte.SIZE);
        return new SecretKeySpec(key, AES_KEY_ALGORITHM);
    } finally {
        // destroy copied bytes of the plaintext password:
        Arrays.fill(pw, (byte) 0);
        for (int i = 0; i < password.length(); i++) {
            sb.setCharAt(i, (char) 0);
        }
    }
}

From source file:br.msf.commons.text.EnhancedStringBuilder.java

public boolean endsWith(final CharSequence suffix) {
    return startsWith(suffix, delegate.length() - suffix.length(), delegate);
}

From source file:br.msf.commons.util.CharSequenceUtils.java

public static List<String> split(final CharSequence sequence, final Pattern pattern,
        final boolean ignoreBlank) {
    if (isBlankOrNull(sequence)) {
        return CollectionUtils.EMPTY_LIST;
    }/* w ww .  j av a 2s  .  com*/
    if (pattern == null) {
        return Collections.singletonList(sequence.toString());
    }
    final Collection<MatchEntry> occurrences = findPattern(pattern, sequence);
    final List<String> split = new ArrayList<String>(occurrences.size() + 1);
    int start = 0;
    for (MatchEntry occurrence : occurrences) {
        final CharSequence sub = sequence.subSequence(start, occurrence.getStart());
        start = occurrence.getEnd();
        if (CharSequenceUtils.isBlankOrNull(sub) && ignoreBlank) {
            continue;
        }
        split.add(sub.toString());
    }
    final CharSequence sub = sequence.subSequence(start, sequence.length());
    if (CharSequenceUtils.isNotBlank(sub) || !ignoreBlank) {
        split.add(sub.toString());
    }
    return split;
}

From source file:org.archive.modules.extractor.ExtractorHTML.java

/**
 * Process style text./*from w  w  w  .  j ava  2s .co m*/
 * @param curi CrawlURI we're processing.
 * @param sequence Sequence from underlying ReplayCharSequence. This
 * is TRANSIENT data. Make a copy if you want the data to live outside
 * of this extractors' lifetime.
 * @param endOfOpenTag
 */
protected void processStyle(CrawlURI curi, CharSequence sequence, int endOfOpenTag) {
    // First, get attributes of script-open tag as per any other tag.
    processGeneralTag(curi, sequence.subSequence(0, 6), sequence.subSequence(0, endOfOpenTag));

    // then, parse for URIs
    numberOfLinksExtracted.addAndGet(
            ExtractorCSS.processStyleCode(this, curi, sequence.subSequence(endOfOpenTag, sequence.length())));
}

From source file:br.msf.commons.text.EnhancedStringBuilder.java

public String substringAfterFirst(final CharSequence toSearch, final Boolean caseSensitive) {
    final int idx = indexOf(toSearch, caseSensitive);
    return (idx < 0) ? null : substring(idx + toSearch.length(), length());
}

From source file:br.msf.commons.text.EnhancedStringBuilder.java

public String substringAfterLast(final CharSequence toSearch, final Boolean caseSensitive) {
    final int idx = lastIndexOf(toSearch, caseSensitive);
    return (idx < 0) ? null : substring(idx + toSearch.length(), length());
}

From source file:com.google.dart.tools.ui.internal.text.dart.DartAutoIndentStrategy_NEW.java

/**
 * Returns the visual length of a given <code>CharSequence</code> taking into account the visual
 * tabulator length./*  w  w w  .j av  a  2  s.com*/
 * 
 * @param seq the string to measure
 * @param tabLength the length of a tab
 * @return the visual length of <code>seq</code>
 */
private int computeVisualLength(CharSequence seq, int tabLength) {

    int size = 0;

    if (seq != null) {
        for (int i = 0; i < seq.length(); i++) {
            char ch = seq.charAt(i);
            if (ch == '\t') {
                if (tabLength != 0) {
                    size += tabLength - size % tabLength;
                    // else: size stays the same
                }
            } else {
                size++;
            }
        }
    }

    return size;
}