Example usage for org.apache.commons.lang StringUtils substringsBetween

Introduction

In this page you can find the example usage for org.apache.commons.lang StringUtils substringsBetween.

Prototype

public static String[] substringsBetween(String str, String open, String close)

Source Link

Document

Searches a String for substrings delimited by a start and end tag, returning all matching substrings in an array.

Usage

From source file:adalid.commons.util.StrUtils.java

public static String[] getParametros(String string) {
    return StringUtils.substringsBetween(string, "{", "}");
}

From source file:opennlp.tools.doc_classifier.DocClassifierTrainingSetMultilingualExtender.java

public List<String> extractEntriesFromSpecial_Export(String filename) {
    List<String> filteredEntries = new ArrayList<String>();
    String content = null;//from   w w w. ja v  a 2  s. c o  m
    try {
        content = FileUtils.readFileToString(new File(filename));
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    String[] entries = StringUtils.substringsBetween(content, "[[", "]]");
    for (String e : entries) {
        if (e.startsWith("Kategorie") || e.startsWith("Category") || e.startsWith("d:") || e.startsWith("User")
                || e.startsWith("Portal"))
            continue;
        if (e.indexOf(':') > -1)
            continue;

        if (e.indexOf(":") > -1)
            continue;
        int endofEntry = e.indexOf('|');
        if (endofEntry > -1)
            e = e.substring(0, endofEntry);
        filteredEntries.add(e);
    }

    filteredEntries = new ArrayList<String>(new HashSet<String>(filteredEntries));
    return filteredEntries;
}

From source file:opennlp.tools.parse_thicket.kernel_interface.style_classif.TSNE_ImporterProcessor.java

public void importFileCreatClassifDirs() {
    Map<Integer, String> id_Text = new HashMap<Integer, String>();
    Map<Integer, String> id_Label = new HashMap<Integer, String>();

    try {//from w w w .j ava 2s .  c  o  m
        FileUtils.cleanDirectory(new File(resourceWorkDir + "/txt"));
    } catch (IOException e2) {
        e2.printStackTrace();
    }

    String text = null;
    try {
        text = FileUtils.readFileToString(new File(resourceWorkDir + importFilePath),
                Charset.defaultCharset().toString());
    } catch (IOException e) {

        e.printStackTrace();
    }

    String[] portions = StringUtils.substringsBetween(text, "<text ", "/text>");
    for (int i = 0; i < portions.length; i++) {
        String label = StringUtils.substringBetween(portions[i], "id=\"", "\">");
        String po = StringUtils.substringBetween(portions[i], "\">", "<");
        id_Text.put(i, po);
        id_Label.put(i, label);
        if (true) {
            String localDirName = label.substring(0, 4);
            if (!new File(resourceWorkDir + "txt/" + localDirName).exists())
                try {
                    FileUtils.forceMkdir(new File(resourceWorkDir + "txt/" + localDirName));
                } catch (IOException e1) {
                    e1.printStackTrace();
                }
            try {
                label = label.replace('/', '_');
                String fullPath = resourceWorkDir + "txt/" + localDirName + "/" + i + label + ".txt";
                FileUtils.writeStringToFile(new File(fullPath), po);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

}

From source file:opennlp.tools.similarity.apps.GoogleAutoCompleteQueryRunner.java

public List<String> getAutoCompleteExpression(String rawExpr) {
    // insert spaces into camel cases
    rawExpr = rawExpr.replaceAll("([a-z][a-z])([A-Z][a-z])", "$1 $2");
    String query = rawExpr.replace(' ', '+');
    try {/* w  w w.  j a  v a2  s.c  om*/
        query = URLEncoder.encode(query, "UTF-8");
    } catch (UnsupportedEncodingException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    String pageOrigHTML = pageFetcher.fetchOrigHTML(searchRequest + query + suffix);
    String[] results = StringUtils.substringsBetween(pageOrigHTML, "<CompleteSuggestion>",
            "</CompleteSuggestion>");
    List<List<String>> accum = new ArrayList<List<String>>();
    if (results == null)
        return null;
    for (String wrapped : results) {
        List<String> accumCase = new ArrayList<String>();
        String[] words = null;
        try {
            words = StringUtils.substringBetween(wrapped, "\"").split(" ");
        } catch (Exception e) {

        }
        if (words == null || words.length < 1)
            continue;
        accumCase = Arrays.asList(words);
        accum.add(accumCase);
    }

    //TODO make more noise-resistant algo
    if (accum.size() > 1) {
        List<String> first = new ArrayList<String>(accum.get(0));
        List<String> second = new ArrayList<String>(accum.get(1));

        first.retainAll(second);
        if (first.size() > 0)
            return first;
        else
            return accum.get(0);
    }

    if (accum.size() == 1)
        return accum.get(0);

    return null;
}

From source file:opennlp.tools.similarity.apps.StoryDiscourseNavigator.java

private String[] obtainKeywordsForAnEntityFromWikipedia(String entity) {
    yrunner.setKey("xdnRVcVf9m4vDvW1SkTAz5kS5DFYa19CrPYGelGJxnc");
    List<HitBase> resultList = yrunner.runSearch(entity, 20);
    HitBase h = null;/*from w  w w .  jav  a 2  s .c  o  m*/
    for (int i = 0; i < resultList.size(); i++) {
        h = resultList.get(i);
        if (h.getUrl().indexOf("wikipedia.") > -1)
            break;
    }
    String content = pFetcher.fetchOrigHTML(h.getUrl());
    content = content.replace("\"><a href=\"#", "&_&_&_&");
    String[] portions = StringUtils.substringsBetween(content, "&_&_&_&", "\"><span");
    List<String> results = new ArrayList<String>();
    for (int i = 0; i < portions.length; i++) {
        if (portions[i].indexOf("cite_note") > -1)
            continue;
        results.add(entity + " " + portions[i].replace('_', ' ').replace('.', ' '));
    }
    return results.toArray(new String[0]);
}

From source file:opennlp.tools.similarity.apps.utils.Utils.java

public static String removeHTMLTagsFromStr(String inputStr) {
    String[] removeTags = StringUtils.substringsBetween(inputStr, "<", ">");

    if (removeTags != null && removeTags.length > 0) {
        for (String tag : removeTags) {
            inputStr = StringUtils.remove(inputStr, "<" + tag + ">");
        }//  w  w  w. java 2 s .  c om
    }

    return inputStr;
}

From source file:org.apache.forrest.conf.AntProperties.java

public synchronized Object put(Object name, Object value) {
    //if the property is already there don't overwrite, as in Ant
    //properties defined first take precedence
    if (!super.containsKey(name)) {
        String[] names = StringUtils.substringsBetween(value.toString(), "${", "}");
        if (names != null) {
            for (int i = 0; i < names.length; i++) {
                String currentName = names[i];
                String valueToSearchFor = "${" + currentName + "}";
                String valueToReplaceWith = (String) super.get(currentName);
                value = StringUtils.replace(value.toString(), valueToSearchFor, valueToReplaceWith);
            }/*from   w w  w.  jav a 2  s  .co m*/
        }
        return super.put(name, value);
    }

    return null;
}

From source file:org.apache.nutch.crawl.SeedGenerator.java

public static void main(String[] args) throws Exception {
    String urlFormat = "http://oumen.com/detail.php?atid={{{1000,4460}}}";
    String[] urlParts = urlFormat.split("\\{\\{\\{\\d+\\,\\d+\\}\\}\\}");
    String[] placeholders = StringUtils.substringsBetween(urlFormat, "{{{", "}}}");

    ArrayList<ArrayList<Integer>> ranges = Lists.newArrayList();
    for (int i = 0; i < placeholders.length; ++i) {
        int min = Integer.parseInt(StringUtils.substringBefore(placeholders[i], ","));
        int max = Integer.parseInt(StringUtils.substringAfter(placeholders[i], ","));

        ranges.add(Lists.newArrayList(min, max));
    }//from   w ww . jav  a 2s  . com

    // we can support only one placeholder right now

    StringBuilder content = new StringBuilder();
    for (int i = ranges.get(0).get(0); i <= ranges.get(0).get(1); ++i) {
        String url = urlParts[0] + i;
        if (urlParts.length > 1) {
            url += urlParts[1];
        }

        content.append(url);
        content.append("\n");
    }

    String tidyDomain = NetUtil.getTopLevelDomain(urlFormat);
    String file = StringUtils.substringBefore(tidyDomain, ".").toLowerCase().replaceAll("[^a-z]", "_");

    file = "/tmp/" + file + ".txt";
    FileUtils.writeStringToFile(new File(file), content.toString(), "utf-8");

    System.out.println("url seed results are saved in : " + file);
}

From source file:org.apache.uima.alchemy.utils.Alchemy2TypeSystemMapper.java

public static void mapAnnotatedEntities(AnnotatedResults results, JCas aJCas) {
    setLanaguage(results, aJCas);/*  w w w .  j ava  2  s.c om*/
    String annotatedText = results.getAnnotatedText();

    // find strings of pattern 'TYPE[TEXT'
    String[] ants = StringUtils.substringsBetween(annotatedText, "[", "]");

    // map the ants to UIMA CAS
    for (String ant : ants) {
        if (ant.indexOf("[") > 0) {
            AlchemyAnnotation alchemyAnnotation = new AlchemyAnnotation(aJCas);

            int indexOfAnt = annotatedText.indexOf(ant);
            alchemyAnnotation.setBegin(indexOfAnt - 1);

            String antText = ant.substring(ant.indexOf("[") + 1);
            alchemyAnnotation.setEnd(indexOfAnt + antText.length() - 1);

            String antType = ant.substring(0, ant.indexOf("["));
            alchemyAnnotation.setAlchemyType(antType);
            alchemyAnnotation.addToIndexes();

            annotatedText = annotatedText.replaceFirst("\\[" + ant.replace("[", "\\[") + "\\]\\]", antText);
        }
    }

}

From source file:org.b3log.solo.processor.SkinRenderer.java

/**
 * Processes the specified FreeMarker template with the specified request, data model, pjax hacking.
 *
 * @param request   the specified request
 * @param dataModel the specified data model
 * @param template  the specified FreeMarker template
 * @return generated HTML//from  w  w  w . j  av a 2s . co m
 * @throws Exception exception
 */
@Override
protected String genHTML(final HttpServletRequest request, final Map<String, Object> dataModel,
        final Template template) throws Exception {
    final boolean isPJAX = isPJAX(request);
    dataModel.put("pjax", isPJAX);

    if (!isPJAX) {
        return super.genHTML(request, dataModel, template);
    }

    final StringWriter stringWriter = new StringWriter();
    template.setOutputEncoding("UTF-8");
    template.process(dataModel, stringWriter);
    final long endTimeMillis = System.currentTimeMillis();
    final String dateString = DateFormatUtils.format(endTimeMillis, "yyyy/MM/dd HH:mm:ss");
    final long startTimeMillis = (Long) request.getAttribute(Keys.HttpRequest.START_TIME_MILLIS);
    final String latke = String.format(
            "\n<!-- Generated by Latke (https://github.com/b3log/latke) in %1$dms, %2$s -->",
            endTimeMillis - startTimeMillis, dateString);
    final String pjaxContainer = request.getHeader("X-PJAX-Container");

    final String html = stringWriter.toString();
    final String[] containers = StringUtils.substringsBetween(html,
            "<!---- pjax {" + pjaxContainer + "} start ---->", "<!---- pjax {" + pjaxContainer + "} end ---->");
    if (null == containers) {
        return html + latke;
    }

    return String.join("", containers) + latke;
}