Example usage for java.lang Character isSpaceChar

List of usage examples for java.lang Character isSpaceChar

Introduction

In this page you can find the example usage for java.lang Character isSpaceChar.

Prototype

public static boolean isSpaceChar(int codePoint) 

Source Link

Document

Determines if the specified character (Unicode code point) is a Unicode space character.

Usage

From source file:com.google.gwt.jolokia.server.servlet.ProxyServlet.java

/**
 * Encodes characters in the query or fragment part of the URI.
 *
 * <p>//from w ww.java2 s . c om
 * Unfortunately, an incoming URI sometimes has characters disallowed by the
 * spec. HttpClient insists that the outgoing proxied request has a valid
 * URI because it uses Java's {@link URI}. To be more forgiving, we must
 * escape the problematic characters. See the URI class for the spec.
 *
 * @param in
 *            example: name=value&foo=bar#fragment
 */
protected static CharSequence encodeUriQuery(CharSequence in) {
    // Note that I can't simply use URI.java to encode because it will
    // escape pre-existing escaped things.
    StringBuilder outBuf = null;
    Formatter formatter = null;
    for (int i = 0; i < in.length(); i++) {
        char c = in.charAt(i);
        boolean escape = true;
        if (c < 128) {
            if (asciiQueryChars.get((int) c)) {
                escape = false;
            }
        } else if (!Character.isISOControl(c) && !Character.isSpaceChar(c)) {// not-ascii
            escape = false;
        }
        if (!escape) {
            if (outBuf != null)
                outBuf.append(c);
        } else {
            // escape
            if (outBuf == null) {
                outBuf = new StringBuilder(in.length() + 5 * 3);
                outBuf.append(in, 0, i);
                formatter = new Formatter(outBuf);
            }
            // leading %, 0 padded, width 2, capital hex
            formatter.format("%%%02X", (int) c);// TODO
        }
    }
    return outBuf != null ? outBuf : in;
}

From source file:au.org.ala.delta.util.Utils.java

/**
 * The main job of this method is to terminate RTF control words with {}
 * instead of a space.//from   w ww . j a va2  s . co m
 */
// Not all cases are handled correctly in the current code.
// For example, text with \bin might not always give correct results
// A few other things, such as \'xx, should perhaps also be given
// explicit treatment, but should not substantially affect the outcome.
public static String despaceRtf(String text, boolean quoteDelims) {
    if (StringUtils.isEmpty(text)) {
        return "";
    }
    int srcPos;
    boolean inRTF = false;
    boolean inParam = false;
    boolean inUnicode = false;
    boolean bracketed = text.charAt(0) == '<' && text.charAt(text.length() - 1) == '>';

    StringBuilder outputText = new StringBuilder(text);
    if (bracketed) // If a "comment", temporarily chop off the terminating
                   // bracket
        outputText.setLength(outputText.length() - 1);
    for (srcPos = 0; srcPos < outputText.length(); ++srcPos) {
        char ch = outputText.charAt(srcPos);
        // Always convert a tab character into a \tab control word
        if (ch == '\t') {
            outputText.replace(srcPos, srcPos + 1, "\\tab{}");
            ch = '\\';
        }
        if (inRTF) {
            if (Character.isDigit(ch) || (!inParam && ch == '-')) {
                if (!inParam && outputText.charAt(srcPos - 1) == 'u' && outputText.charAt(srcPos - 2) == '\\')
                    inUnicode = true;
                inParam = true;
            } else if (inParam || !Character.isLetter(ch)) {
                boolean wasInUnicode = inUnicode;
                inUnicode = inParam = inRTF = false;
                if (Character.isSpaceChar(ch)) {
                    // Check for the absence of a control; when this
                    // happens,
                    // the terminating character IS the control word!
                    if (srcPos > 0 && outputText.charAt(srcPos - 1) == '\\') {
                        // \<NEWLINE> is treated as a \par control. We make
                        // this
                        // change here explicitly, to make it more apparent.
                        // But should we keep the <NEWLINE> character around
                        // as well,
                        // as a clue for breaking lines during output?
                        if (ch == '\n' || ch == '\r') {
                            // text.replace(--srcPos, 2, "\\par{}");
                            outputText.insert(srcPos, "par{}");
                            srcPos += 5;
                        }
                        // (Note that if we don't catch this here, replacing
                        // "\ " could yield
                        // "\{}" which is WRONG. But rather than just get
                        // rid of this, it
                        // is probably better to replace with {} to ensure
                        // that any preceding
                        // RTF is terminated)
                        else if (ch == ' ') {
                            outputText.replace(srcPos - 1, 2, "{}");
                        }
                    }
                    // This is the chief condition we are trying to fix.
                    // Terminate the RTF
                    // control phrase with {} instead of white space...
                    // But if the terminator is a new line, we keep it
                    // around
                    // for assistance in wrapping output lines.
                    // else if (ch == '\n')
                    // {
                    // text.insert(srcPos, "{}");
                    // srcPos += 2;
                    // }
                    else if (ch != '\n') {
                        outputText.setCharAt(srcPos, '{');
                        outputText.insert(++srcPos, '}');
                    }
                }
                // No reason to do the following. Probably better to leave
                // the
                // character quoted.
                // Reinstated 8 December 1999 because we need to be sure
                // all text is in a consistent state when linking characters
                // One exception - if the quoted character is a Unicode
                // "replacement"
                // character, we'd better leave it quoted.
                else if (ch == '\'' && !wasInUnicode && srcPos + 2 < outputText.length()) {
                    char[] buff = new char[3];
                    buff[0] = outputText.charAt(srcPos + 1);
                    buff[1] = outputText.charAt(srcPos + 2);
                    buff[2] = 0;

                    int[] endPos = new int[1];
                    int value = strtol(new String(buff), endPos, 16);
                    if ((endPos[0] == 2) && value > 127 && outputText.charAt(srcPos - 1) == '\\') {

                        srcPos--;
                        outputText.replace(srcPos, srcPos + 4, new String(new char[] { (char) value }));

                    }
                } else if (ch == '\\' && outputText.charAt(srcPos - 1) != '\\') // Terminates
                                                                                // RTF,
                                                                                // but
                                                                                // starts
                                                                                // new
                                                                                // RTF
                {
                    inRTF = true;
                    if (wasInUnicode && srcPos + 1 < outputText.length()
                            && outputText.charAt(srcPos + 1) == '\'')
                        inUnicode = true;
                } else if (ch == '>') {
                    // Append a space after the RTF (it was probably
                    // stripped by the attribute parsing)
                    outputText.insert(srcPos, "{}");
                }
            }
        } else if (ch == '\\')
            inRTF = true;
        // TEST - to allow outputting of a "*" or "#" character in arbitrary
        // text...
        else if (quoteDelims && (ch == '*' || ch == '#')
                && (srcPos == 0 || Character.isSpaceChar(outputText.charAt(srcPos - 1)))) {
            // //char buffer[5];
            // Always build a 4-character replacement string, like:
            // \'20
            // //sprintf(buffer, "\\\'%2.2x", (int)ch);
            // //text.replace(srcPos, buffer, 4);
            // //srcPos += 3;
            outputText.insert(srcPos, "{}");
            srcPos += 2;
        }
    }
    if (inRTF)
        outputText.append("{}");
    if (bracketed)
        outputText.append('>');
    return outputText.toString();
}

From source file:com.fuseim.webapp.ProxyServlet.java

/**
 * Encodes characters in the query or fragment part of the URI.
 *
 * <p>Unfortunately, an incoming URI sometimes has characters disallowed by the spec. HttpClient
 * insists that the outgoing proxied request has a valid URI because it uses Java's {@link URI}.
 * To be more forgiving, we must escape the problematic characters. See the URI class for the
 * spec./*from w w  w  .  ja va2  s  .co m*/
 *
 * @param in example: name=value&amp;foo=bar#fragment
 */
protected static CharSequence encodeUriQuery(CharSequence in) {
    //Note that I can't simply use URI.java to encode because it will escape pre-existing escaped things.
    StringBuilder outBuf = null;
    Formatter formatter = null;
    for (int i = 0; i < in.length(); i++) {
        char c = in.charAt(i);
        boolean escape = true;
        if (c < 128) {
            if (asciiQueryChars.get((int) c)) {
                escape = false;
            }
        } else if (!Character.isISOControl(c) && !Character.isSpaceChar(c)) { //not-ascii
            escape = false;
        }
        if (!escape) {
            if (outBuf != null)
                outBuf.append(c);
        } else {
            //escape
            if (outBuf == null) {
                outBuf = new StringBuilder(in.length() + 5 * 3);
                outBuf.append(in, 0, i);
                formatter = new Formatter(outBuf);
            }
            //leading %, 0 padded, width 2, capital hex
            formatter.format("%%%02X", (int) c); //TODO
        }
    }
    return outBuf != null ? outBuf : in;
}

From source file:org.openstreetmap.josm.tools.Utils.java

private static boolean isStrippedChar(char c, final char... skipChars) {
    return Character.isWhitespace(c) || Character.isSpaceChar(c) || stripChar(skipChars, c);
}

From source file:plugin.notes.gui.NotesView.java

private void handleEnter() {
    // TODO: this sucks.  clean it up
    Element elem;//from  w  w  w.j  a  v a  2 s  . c om
    int pos = editor.getCaretPosition();
    ExtendedHTMLDocument htmlDoc = (ExtendedHTMLDocument) editor.getStyledDocument();

    try {
        if (ExtendedHTMLEditorKit.checkParentsTag(htmlDoc.getParagraphElement(editor.getCaretPosition()),
                HTML.Tag.UL)
                || ExtendedHTMLEditorKit.checkParentsTag(htmlDoc.getParagraphElement(editor.getCaretPosition()),
                        HTML.Tag.OL)) {
            elem = ExtendedHTMLEditorKit
                    .getListItemParent(htmlDoc.getCharacterElement(editor.getCaretPosition()));

            int so = elem.getStartOffset();
            int eo = elem.getEndOffset();
            char[] temp = editor.getText(so, eo - so).toCharArray();
            boolean content = false;

            for (char aTemp : temp) {
                if (!Character.isWhitespace(aTemp)) {
                    content = true;
                }
            }

            int repos = -1;
            if (content) {
                int end = -1;
                int j = temp.length;

                do {
                    j--;

                    if (Character.isLetterOrDigit(temp[j])) {
                        end = j;
                    }
                } while ((end == -1) && (j >= 0));

                j = end;

                do {
                    j++;

                    if (!Character.isSpaceChar(temp[j])) {
                        repos = j - end - 1;
                    }
                } while ((repos == -1) && (j < temp.length));

                if (repos == -1) {
                    repos = 0;
                }
            }

            if ((elem.getStartOffset() == elem.getEndOffset()) || !content) {
                manageListElement(htmlDoc);
            } else {
                if ((editor.getCaretPosition() + 1) == elem.getEndOffset()) {
                    ExtendedHTMLEditorKit.insertListElement(editor, "");
                    editor.setCaretPosition(pos - repos);
                } else {
                    int caret = editor.getCaretPosition();
                    String tempString = editor.getText(caret, eo - caret);
                    editor.select(caret, eo - 1);
                    editor.replaceSelection("");
                    ExtendedHTMLEditorKit.insertListElement(editor, tempString);

                    Element newLi = ExtendedHTMLEditorKit
                            .getListItemParent(htmlDoc.getCharacterElement(editor.getCaretPosition()));
                    editor.setCaretPosition(newLi.getEndOffset());
                }
            }
        }
    } catch (BadLocationException ble) {
        Logging.errorPrint(ble.getMessage(), ble);
    }
}

From source file:org.jivesoftware.community.util.StringUtils.java

public static boolean isWebSafeString(String str, String otherSafeCharacters) {
    if (str == null)
        throw new NullPointerException("String to be tested cannot be null");
    int sz = str.length();
    for (int i = 0; i < sz; i++) {
        char iChar = str.charAt(i);
        if (Character.isLetterOrDigit(iChar) || Character.isSpaceChar(iChar))
            continue;
        boolean found = false;
        int j = 0;
        do {//  w w  w.j a v  a  2 s.c  o m
            if (j >= otherSafeCharacters.length())
                break;
            char other = otherSafeCharacters.charAt(j);
            if (other == iChar) {
                found = true;
                break;
            }
            j++;
        } while (true);
        if (!found)
            return false;
    }

    return true;
}

From source file:com.insprise.common.lang.StringUtilities.java

/**
  * Remove all spaces in a word/* w  ww .  ja  v  a  2s.  co m*/
  * @param word
  * @return parsed word
  */
 public static String removeWordSpace(String word) {
     StringBuilder sb = new StringBuilder("");
     int len = word.length();
     for (int i = 0; i < len; i++) {
         Character c = word.charAt(i);
         if (!Character.isSpaceChar(c)) {
             sb.append(c);
         }
     }
     return sb.toString();
 }

From source file:net.sf.firemox.tools.MToolKit.java

/**
 * Return the specified string without any local white spaces. Would be
 * replaced when/*from  w  w w  .ja v a  2  s  . c o  m*/
 * {@link org.apache.commons.lang.StringUtils#deleteWhitespace(String)} would
 * work.
 * 
 * @param string
 *          the string to normalize.
 * @return the given string without any space char.
 * @see Character#isWhitespace(char)
 */
public static String replaceWhiteSpaces(String string) {
    final StringBuilder workingString = new StringBuilder(StringUtils.deleteWhitespace(string));
    for (int count = workingString.length(); count-- > 0;) {
        if (Character.isSpaceChar(workingString.charAt(count))) {
            // delete this char
            workingString.deleteCharAt(count);
        }
    }
    return workingString.toString();
}

From source file:com.guzzservices.manager.impl.ip.CZUtil.java

/**
 * ???/*from   ww w .  j  ava 2  s. c o m*/
 * 
 * @param s
 *              
 * @return
 *              ?
 */
public static String filterUnprintableCharacter(String s) {
    sb.delete(0, sb.length());
    sb.append(s);

    // 
    for (; sb.length() > 0;) {
        char c = sb.charAt(0);
        if (shouldFilterred(c))
            sb.deleteCharAt(0);
        else
            break;
    }

    // 
    for (; sb.length() > 0;) {
        char c = sb.charAt(sb.length() - 1);
        if (shouldFilterred(c))
            sb.deleteCharAt(sb.length() - 1);
        else
            break;
    }

    // 
    int len = sb.length();
    for (int i = len - 1; i >= 0; i--) {
        char c = sb.charAt(i);
        if (shouldFilterred(c) && !Character.isSpaceChar(c))
            sb.deleteCharAt(i);
    }
    return sb.toString();
}

From source file:com.ikanow.infinit.e.harvest.enrichment.custom.UnstructuredAnalysisHarvester.java

/**
 * processMeta - handle an individual field
 *///from  w ww . j  a  v  a  2 s.co m
private void processMeta(DocumentPojo f, metaField m, String text, SourcePojo source,
        UnstructuredAnalysisConfigPojo uap) {

    boolean bAllowDuplicates = false;
    if ((null != m.flags) && m.flags.contains("U")) {
        bAllowDuplicates = true;
    }
    if ((null == m.scriptlang) || m.scriptlang.equalsIgnoreCase("regex")) {

        Pattern metaPattern = createRegex(m.script, m.flags);

        int timesToRun = 1;
        Object[] currField = null;
        if ((null != m.flags) && m.flags.contains("c")) {
            currField = f.getMetadata().get(m.fieldName);
        }
        if (null != currField) { // chained metadata
            timesToRun = currField.length;
            text = (String) currField[0];
        } //TESTED

        Matcher matcher = metaPattern.matcher(text);
        LinkedList<String> Llist = null;

        for (int ii = 0; ii < timesToRun; ++ii) {
            if (ii > 0) { // (else either just text, or in the above "chained metadata" initialization above)
                text = (String) currField[ii];
                matcher = metaPattern.matcher(text);
            } //TESTED

            StringBuffer prefix = new StringBuffer(m.fieldName).append(':');
            int nFieldNameLen = m.fieldName.length() + 1;

            try {
                while (matcher.find()) {
                    if (null == Llist) {
                        Llist = new LinkedList<String>();
                    }
                    if (null == m.groupNum) {
                        m.groupNum = 0;
                    }
                    String toAdd = matcher.group(m.groupNum);
                    if (null != m.replace) {
                        toAdd = metaPattern.matcher(toAdd).replaceFirst(m.replace);
                    }
                    if ((null != m.flags) && m.flags.contains("H")) {
                        toAdd = StringEscapeUtils.unescapeHtml(toAdd);
                    }
                    prefix.setLength(nFieldNameLen);
                    prefix.append(toAdd);
                    String dupCheck = prefix.toString();

                    if (!regexDuplicates.contains(dupCheck)) {
                        Llist.add(toAdd);
                        if (!bAllowDuplicates) {
                            regexDuplicates.add(dupCheck);
                        }
                    }
                }
            } catch (Exception e) {
                this._context.getHarvestStatus().logMessage("processMeta1: " + e.getMessage(), true);
            }
        } //(end metadata chaining handling)
        if (null != Llist) {
            if (null != currField) { // (overwrite)
                f.getMetadata().put(m.fieldName, Llist.toArray());
            } else {
                f.addToMetadata(m.fieldName, Llist.toArray());
            }
        } //TESTED
    } else if (m.scriptlang.equalsIgnoreCase("javascript")) {
        if (null == f.getMetadata()) {
            f.setMetadata(new LinkedHashMap<String, Object[]>());
        }
        //set the script engine up if necessary
        if ((null != source) && (null != uap)) {
            //(these are null if called from new processing pipeline vs legacy code)
            intializeScriptEngine(source, uap);
        }

        try {
            //TODO (INF-2488): in new format, this should only happen in between contentMeta blocks/docs
            // (also should be able to use SAH _document object I think?)

            // Javascript: the user passes in 
            Object[] currField = f.getMetadata().get(m.fieldName);
            if ((null == m.flags) || m.flags.isEmpty()) {
                if (null == currField) {
                    engine.put("text", text);
                    engine.put("_iterator", null);
                }
                //(otherwise will just pass the current fields in there)
            } else { // flags specified
                if (m.flags.contains("t")) { // text
                    engine.put("text", text);
                }
                if (m.flags.contains("d")) { // entire document (minus ents and assocs)
                    GsonBuilder gb = new GsonBuilder();
                    Gson g = gb.create();
                    List<EntityPojo> ents = f.getEntities();
                    List<AssociationPojo> assocs = f.getAssociations();
                    try {
                        f.setEntities(null);
                        f.setAssociations(null);
                        engine.put("document", g.toJson(f));
                        securityManager.eval(engine, JavaScriptUtils.initScript);
                    } finally {
                        f.setEntities(ents);
                        f.setAssociations(assocs);
                    }
                }
                if (m.flags.contains("m")) { // metadata
                    GsonBuilder gb = new GsonBuilder();
                    Gson g = gb.create();
                    engine.put("_metadata", g.toJson(f.getMetadata()));
                    securityManager.eval(engine, JavaScriptUtils.iteratorMetaScript);
                }
            } //(end flags processing)

            if (null != currField) {
                f.getMetadata().remove(m.fieldName);

                GsonBuilder gb = new GsonBuilder();
                Gson g = gb.create();
                engine.put("_iterator", g.toJson(currField));
                securityManager.eval(engine, JavaScriptUtils.iteratorDocScript);
            }
            //TESTED (handling of flags, and replacing of existing fields, including when field is null but specified)

            Object returnVal = securityManager.eval(engine, m.script);

            if (null != returnVal) {
                if (returnVal instanceof String) { // The only easy case
                    Object[] array = new Object[1];
                    if ((null != m.flags) && m.flags.contains("H")) {
                        returnVal = StringEscapeUtils.unescapeHtml((String) returnVal);
                    }
                    array[0] = returnVal;
                    f.addToMetadata(m.fieldName, array);
                } else { // complex object or array - in either case the engine turns these into
                         // internal.NativeArray or internal.NativeObject

                    BasicDBList outList = JavaScriptUtils.parseNativeJsObject(returnVal, engine);
                    f.addToMetadata(m.fieldName, outList.toArray());
                }
            }
        } catch (ScriptException e) {

            _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(),
                    true);

            // Just do nothing and log
            // e.printStackTrace();
            //DEBUG (don't output log messages per doc)
            //logger.error(e.getMessage());
        } catch (Exception e) {

            _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(),
                    true);

            // Just do nothing and log
            // e.printStackTrace();
            //DEBUG (don't output log messages per doc)
            //logger.error(e.getMessage());
        }
    } else if (m.scriptlang.equalsIgnoreCase("xpath")) {

        String xpath = m.script;

        try {
            createHtmlCleanerIfNeeded();

            int timesToRun = 1;
            Object[] currField = null;
            if ((null != m.flags) && m.flags.contains("c")) {
                currField = f.getMetadata().get(m.fieldName);
            }
            if (null != currField) { // chained metadata
                f.getMetadata().remove(m.fieldName); // (so will add to the end)
                timesToRun = currField.length;
                text = (String) currField[0];
            } //TESTED

            for (int ii = 0; ii < timesToRun; ++ii) {
                if (ii > 0) { // (else either just text, or in the above "chained metadata" initialization above)
                    text = (String) currField[ii];
                } //TESTED

                TagNode node = cleaner.clean(new ByteArrayInputStream(text.getBytes()));

                //NewCode : Only use html cleaner for cleansing
                //use JAXP for full Xpath lib
                Document doc = new DomSerializer(new CleanerProperties()).createDOM(node);

                String extraRegex = extractRegexFromXpath(xpath);

                if (extraRegex != null)
                    xpath = xpath.replace(extraRegex, "");

                XPath xpa = XPathFactory.newInstance().newXPath();
                NodeList res = (NodeList) xpa.evaluate(xpath, doc, XPathConstants.NODESET);

                if (res.getLength() > 0) {
                    if ((null != m.flags) && (m.flags.contains("o"))) { // "o" for object
                        m.groupNum = -1; // (see bConvertToObject below)
                    }
                    StringBuffer prefix = new StringBuffer(m.fieldName).append(':');
                    int nFieldNameLen = m.fieldName.length() + 1;
                    ArrayList<Object> Llist = new ArrayList<Object>(res.getLength());
                    boolean bConvertToObject = ((m.groupNum != null) && (m.groupNum == -1));
                    boolean convertToXml = ((null != m.flags) && (m.flags.contains("x")));
                    for (int i = 0; i < res.getLength(); i++) {
                        Node info_node = res.item(i);
                        if ((null != m.flags) && (m.flags.contains("g"))) {
                            Llist.add(parseHtmlTable(info_node, m.replace));
                        } else if (bConvertToObject || convertToXml) {
                            // Try to create a JSON object out of this
                            StringWriter writer = new StringWriter();
                            try {
                                Transformer transformer = TransformerFactory.newInstance().newTransformer();
                                transformer.transform(new DOMSource(info_node), new StreamResult(writer));
                            } catch (TransformerException e1) {
                                continue;
                            }

                            if (bConvertToObject) {
                                try {
                                    JSONObject subObj = XML.toJSONObject(writer.toString());
                                    if (xpath.endsWith("*")) { // (can have any number of different names here)
                                        Llist.add(XmlToMetadataParser.convertJsonObjectToLinkedHashMap(subObj));
                                    } //TESTED
                                    else {
                                        String[] rootNames = JSONObject.getNames(subObj);
                                        if (1 == rootNames.length) {
                                            // (don't think it can't be any other number in fact)
                                            subObj = subObj.getJSONObject(rootNames[0]);
                                        }
                                        boolean bUnescapeHtml = ((null != m.flags) && m.flags.contains("H"));
                                        Llist.add(XmlToMetadataParser.convertJsonObjectToLinkedHashMap(subObj,
                                                bUnescapeHtml));
                                    } //TESTED
                                } catch (JSONException e) { // Just carry on
                                    continue;
                                }
                                //TESTED
                            } else { // leave in XML form
                                Llist.add(writer.toString().substring(38)); // +38: (step over <?xml version="1.0" encoding="UTF-8"?>)
                            } //TESTED (xpath_test.json)
                        } else { // Treat this as string, either directly or via regex
                            String info = info_node.getTextContent().trim();
                            if (extraRegex == null || extraRegex.isEmpty()) {
                                prefix.setLength(nFieldNameLen);
                                prefix.append(info);
                                String dupCheck = prefix.toString();

                                if (!regexDuplicates.contains(dupCheck)) {
                                    if ((null != m.flags) && m.flags.contains("H")) {
                                        info = StringEscapeUtils.unescapeHtml(info);
                                    }
                                    Llist.add(info);
                                    if (!bAllowDuplicates) {
                                        regexDuplicates.add(dupCheck);
                                    }
                                }
                            } else { // Apply regex to the string
                                Pattern dataRegex = createRegex(extraRegex, m.flags);
                                Matcher dataMatcher = dataRegex.matcher(info);
                                boolean result = dataMatcher.find();
                                while (result) {
                                    String toAdd;
                                    if (m.groupNum != null)
                                        toAdd = dataMatcher.group(m.groupNum);
                                    else
                                        toAdd = dataMatcher.group();
                                    prefix.setLength(nFieldNameLen);
                                    prefix.append(toAdd);
                                    String dupCheck = prefix.toString();

                                    if (!regexDuplicates.contains(dupCheck)) {
                                        if ((null != m.flags) && m.flags.contains("H")) {
                                            toAdd = StringEscapeUtils.unescapeHtml(toAdd);
                                        }
                                        Llist.add(toAdd);
                                        if (!bAllowDuplicates) {
                                            regexDuplicates.add(dupCheck);
                                        }
                                    }

                                    result = dataMatcher.find();
                                }
                            } //(regex vs no regex)
                        } //(end string vs object)
                    }
                    if (Llist.size() > 0) {
                        f.addToMetadata(m.fieldName, Llist.toArray());
                    }
                }
            } //(end loop over metadata objects if applicable)

        } catch (IOException ioe) {
            _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(ioe).toString(),
                    true);

            // Just do nothing and log
            //DEBUG (don't output log messages per doc)
            //logger.error(ioe.getMessage());
        } catch (ParserConfigurationException e1) {
            _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(e1).toString(),
                    true);
            // Just do nothing and log
            //DEBUG (don't output log messages per doc)
            //logger.error(e1.getMessage());
        } catch (XPathExpressionException e1) {
            _context.getHarvestStatus().logMessage("Error evaluating xpath expression: " + xpath, true);
        }
    } else if (m.scriptlang.equalsIgnoreCase("stream")) { // XML or JSON streaming interface
        // which one?
        try {
            boolean json = false;
            boolean xml = false;
            for (int i = 0; i < 128; ++i) {
                if ('<' == text.charAt(i)) {
                    xml = true;
                    break;
                }
                if ('{' == text.charAt(i) || '[' == text.charAt(i)) {
                    json = true;
                    break;
                }
                if (!Character.isSpaceChar(text.charAt(i))) {
                    break;
                }
            } //TESTED (too many spaces: meta_stream_test, test4; incorrect chars: test3, xml: test1, json: test2)

            boolean textNotObject = m.flags == null || !m.flags.contains("o");

            List<DocumentPojo> docs = new LinkedList<DocumentPojo>();
            List<String> levelOneFields = null;
            if (null != m.script) {
                levelOneFields = Arrays.asList(m.script.split("\\s*,\\s*"));
                if ((1 == levelOneFields.size()) && levelOneFields.get(0).isEmpty()) {
                    // convert [""] to null
                    levelOneFields = null;
                }
            } //TESTED (json and xml)

            if (xml) {
                XmlToMetadataParser parser = new XmlToMetadataParser(levelOneFields, null, null, null, null,
                        null, Integer.MAX_VALUE);
                XMLInputFactory factory = XMLInputFactory.newInstance();
                factory.setProperty(XMLInputFactory.IS_COALESCING, true);
                factory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
                XMLStreamReader reader = null;
                try {
                    reader = factory.createXMLStreamReader(new ByteArrayInputStream(text.getBytes()));
                    docs = parser.parseDocument(reader, textNotObject);
                } finally {
                    if (null != reader)
                        reader.close();
                }
            } //TESTED (meta_stream_test, test1)
            if (json) {
                JsonReader jsonReader = null;
                try {
                    JsonToMetadataParser parser = new JsonToMetadataParser(null, levelOneFields, null, null,
                            Integer.MAX_VALUE);
                    jsonReader = new JsonReader(
                            new InputStreamReader(new ByteArrayInputStream(text.getBytes()), "UTF-8"));
                    jsonReader.setLenient(true);
                    docs = parser.parseDocument(jsonReader, textNotObject);
                } finally {
                    if (null != jsonReader)
                        jsonReader.close();
                }
            } //TESTED (meta_stream_test test2)

            if (!docs.isEmpty()) {
                ArrayList<String> Llist = null;
                ArrayList<Object> LlistObj = null;
                if (textNotObject) {
                    Llist = new ArrayList<String>(docs.size());
                } else {
                    LlistObj = new ArrayList<Object>(docs.size());
                }
                for (DocumentPojo doc : docs) {
                    if ((null != doc.getFullText()) || (null != doc.getMetadata())) {
                        if (textNotObject) {
                            Llist.add(doc.getFullText());
                        } //TESTED
                        else if (xml) {
                            LlistObj.add(doc.getMetadata());
                        } //TESTED
                        else if (json) {
                            Object o = doc.getMetadata();
                            if (null != o) {
                                o = doc.getMetadata().get("json");
                                if (o instanceof Object[]) {
                                    LlistObj.addAll(Arrays.asList((Object[]) o));
                                } else if (null != o) {
                                    LlistObj.add(o);
                                } //TESTED
                            }
                        } //TESTED
                    }
                } //TESTED
                if ((null != Llist) && !Llist.isEmpty()) {
                    f.addToMetadata(m.fieldName, Llist.toArray());
                } //TESTED
                if ((null != LlistObj) && !LlistObj.isEmpty()) {
                    f.addToMetadata(m.fieldName, LlistObj.toArray());
                } //TESTED

            } //TESTED (meta_stream_test test1,test2)
        } //(end try)
        catch (Exception e) { // various parsing errors
            _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(),
                    true);
        }
    } //TESTED (meta_stream_test)

    // (don't currently support other script types)
}