List of usage examples for org.apache.commons.lang StringUtils substringsBetween
public static String[] substringsBetween(String str, String open, String close)
Searches a String for substrings delimited by a start and end tag, returning all matching substrings in an array.
From source file:adalid.commons.util.StrUtils.java
public static String[] getParametros(String string) { return StringUtils.substringsBetween(string, "{", "}"); }
From source file:opennlp.tools.doc_classifier.DocClassifierTrainingSetMultilingualExtender.java
public List<String> extractEntriesFromSpecial_Export(String filename) { List<String> filteredEntries = new ArrayList<String>(); String content = null;//from w w w. ja v a 2 s. c o m try { content = FileUtils.readFileToString(new File(filename)); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } String[] entries = StringUtils.substringsBetween(content, "[[", "]]"); for (String e : entries) { if (e.startsWith("Kategorie") || e.startsWith("Category") || e.startsWith("d:") || e.startsWith("User") || e.startsWith("Portal")) continue; if (e.indexOf(':') > -1) continue; if (e.indexOf(":") > -1) continue; int endofEntry = e.indexOf('|'); if (endofEntry > -1) e = e.substring(0, endofEntry); filteredEntries.add(e); } filteredEntries = new ArrayList<String>(new HashSet<String>(filteredEntries)); return filteredEntries; }
From source file:opennlp.tools.parse_thicket.kernel_interface.style_classif.TSNE_ImporterProcessor.java
public void importFileCreatClassifDirs() { Map<Integer, String> id_Text = new HashMap<Integer, String>(); Map<Integer, String> id_Label = new HashMap<Integer, String>(); try {//from w w w .j ava 2s . c o m FileUtils.cleanDirectory(new File(resourceWorkDir + "/txt")); } catch (IOException e2) { e2.printStackTrace(); } String text = null; try { text = FileUtils.readFileToString(new File(resourceWorkDir + importFilePath), Charset.defaultCharset().toString()); } catch (IOException e) { e.printStackTrace(); } String[] portions = StringUtils.substringsBetween(text, "<text ", "/text>"); for (int i = 0; i < portions.length; i++) { String label = StringUtils.substringBetween(portions[i], "id=\"", "\">"); String po = StringUtils.substringBetween(portions[i], "\">", "<"); id_Text.put(i, po); id_Label.put(i, label); if (true) { String localDirName = label.substring(0, 4); if (!new File(resourceWorkDir + "txt/" + localDirName).exists()) try { FileUtils.forceMkdir(new File(resourceWorkDir + "txt/" + localDirName)); } catch (IOException e1) { e1.printStackTrace(); } try { label = label.replace('/', '_'); String fullPath = resourceWorkDir + "txt/" + localDirName + "/" + i + label + ".txt"; FileUtils.writeStringToFile(new File(fullPath), po); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:opennlp.tools.similarity.apps.GoogleAutoCompleteQueryRunner.java
public List<String> getAutoCompleteExpression(String rawExpr) { // insert spaces into camel cases rawExpr = rawExpr.replaceAll("([a-z][a-z])([A-Z][a-z])", "$1 $2"); String query = rawExpr.replace(' ', '+'); try {/* w w w. j a v a2 s.c om*/ query = URLEncoder.encode(query, "UTF-8"); } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block e.printStackTrace(); } String pageOrigHTML = pageFetcher.fetchOrigHTML(searchRequest + query + suffix); String[] results = StringUtils.substringsBetween(pageOrigHTML, "<CompleteSuggestion>", "</CompleteSuggestion>"); List<List<String>> accum = new ArrayList<List<String>>(); if (results == null) return null; for (String wrapped : results) { List<String> accumCase = new ArrayList<String>(); String[] words = null; try { words = StringUtils.substringBetween(wrapped, "\"").split(" "); } catch (Exception e) { } if (words == null || words.length < 1) continue; accumCase = Arrays.asList(words); accum.add(accumCase); } //TODO make more noise-resistant algo if (accum.size() > 1) { List<String> first = new ArrayList<String>(accum.get(0)); List<String> second = new ArrayList<String>(accum.get(1)); first.retainAll(second); if (first.size() > 0) return first; else return accum.get(0); } if (accum.size() == 1) return accum.get(0); return null; }
From source file:opennlp.tools.similarity.apps.StoryDiscourseNavigator.java
private String[] obtainKeywordsForAnEntityFromWikipedia(String entity) { yrunner.setKey("xdnRVcVf9m4vDvW1SkTAz5kS5DFYa19CrPYGelGJxnc"); List<HitBase> resultList = yrunner.runSearch(entity, 20); HitBase h = null;/*from w w w . jav a 2 s .c o m*/ for (int i = 0; i < resultList.size(); i++) { h = resultList.get(i); if (h.getUrl().indexOf("wikipedia.") > -1) break; } String content = pFetcher.fetchOrigHTML(h.getUrl()); content = content.replace("\"><a href=\"#", "&_&_&_&"); String[] portions = StringUtils.substringsBetween(content, "&_&_&_&", "\"><span"); List<String> results = new ArrayList<String>(); for (int i = 0; i < portions.length; i++) { if (portions[i].indexOf("cite_note") > -1) continue; results.add(entity + " " + portions[i].replace('_', ' ').replace('.', ' ')); } return results.toArray(new String[0]); }
From source file:opennlp.tools.similarity.apps.utils.Utils.java
public static String removeHTMLTagsFromStr(String inputStr) { String[] removeTags = StringUtils.substringsBetween(inputStr, "<", ">"); if (removeTags != null && removeTags.length > 0) { for (String tag : removeTags) { inputStr = StringUtils.remove(inputStr, "<" + tag + ">"); }// w w w. java 2 s . c om } return inputStr; }
From source file:org.apache.forrest.conf.AntProperties.java
public synchronized Object put(Object name, Object value) { //if the property is already there don't overwrite, as in Ant //properties defined first take precedence if (!super.containsKey(name)) { String[] names = StringUtils.substringsBetween(value.toString(), "${", "}"); if (names != null) { for (int i = 0; i < names.length; i++) { String currentName = names[i]; String valueToSearchFor = "${" + currentName + "}"; String valueToReplaceWith = (String) super.get(currentName); value = StringUtils.replace(value.toString(), valueToSearchFor, valueToReplaceWith); }/*from w w w. jav a 2 s .co m*/ } return super.put(name, value); } return null; }
From source file:org.apache.nutch.crawl.SeedGenerator.java
public static void main(String[] args) throws Exception { String urlFormat = "http://oumen.com/detail.php?atid={{{1000,4460}}}"; String[] urlParts = urlFormat.split("\\{\\{\\{\\d+\\,\\d+\\}\\}\\}"); String[] placeholders = StringUtils.substringsBetween(urlFormat, "{{{", "}}}"); ArrayList<ArrayList<Integer>> ranges = Lists.newArrayList(); for (int i = 0; i < placeholders.length; ++i) { int min = Integer.parseInt(StringUtils.substringBefore(placeholders[i], ",")); int max = Integer.parseInt(StringUtils.substringAfter(placeholders[i], ",")); ranges.add(Lists.newArrayList(min, max)); }//from w ww . jav a 2s . com // we can support only one placeholder right now StringBuilder content = new StringBuilder(); for (int i = ranges.get(0).get(0); i <= ranges.get(0).get(1); ++i) { String url = urlParts[0] + i; if (urlParts.length > 1) { url += urlParts[1]; } content.append(url); content.append("\n"); } String tidyDomain = NetUtil.getTopLevelDomain(urlFormat); String file = StringUtils.substringBefore(tidyDomain, ".").toLowerCase().replaceAll("[^a-z]", "_"); file = "/tmp/" + file + ".txt"; FileUtils.writeStringToFile(new File(file), content.toString(), "utf-8"); System.out.println("url seed results are saved in : " + file); }
From source file:org.apache.uima.alchemy.utils.Alchemy2TypeSystemMapper.java
public static void mapAnnotatedEntities(AnnotatedResults results, JCas aJCas) { setLanaguage(results, aJCas);/* w w w . j ava 2 s.c om*/ String annotatedText = results.getAnnotatedText(); // find strings of pattern 'TYPE[TEXT' String[] ants = StringUtils.substringsBetween(annotatedText, "[", "]"); // map the ants to UIMA CAS for (String ant : ants) { if (ant.indexOf("[") > 0) { AlchemyAnnotation alchemyAnnotation = new AlchemyAnnotation(aJCas); int indexOfAnt = annotatedText.indexOf(ant); alchemyAnnotation.setBegin(indexOfAnt - 1); String antText = ant.substring(ant.indexOf("[") + 1); alchemyAnnotation.setEnd(indexOfAnt + antText.length() - 1); String antType = ant.substring(0, ant.indexOf("[")); alchemyAnnotation.setAlchemyType(antType); alchemyAnnotation.addToIndexes(); annotatedText = annotatedText.replaceFirst("\\[" + ant.replace("[", "\\[") + "\\]\\]", antText); } } }
From source file:org.b3log.solo.processor.SkinRenderer.java
/** * Processes the specified FreeMarker template with the specified request, data model, pjax hacking. * * @param request the specified request * @param dataModel the specified data model * @param template the specified FreeMarker template * @return generated HTML//from w w w . j av a 2s . co m * @throws Exception exception */ @Override protected String genHTML(final HttpServletRequest request, final Map<String, Object> dataModel, final Template template) throws Exception { final boolean isPJAX = isPJAX(request); dataModel.put("pjax", isPJAX); if (!isPJAX) { return super.genHTML(request, dataModel, template); } final StringWriter stringWriter = new StringWriter(); template.setOutputEncoding("UTF-8"); template.process(dataModel, stringWriter); final long endTimeMillis = System.currentTimeMillis(); final String dateString = DateFormatUtils.format(endTimeMillis, "yyyy/MM/dd HH:mm:ss"); final long startTimeMillis = (Long) request.getAttribute(Keys.HttpRequest.START_TIME_MILLIS); final String latke = String.format( "\n<!-- Generated by Latke (https://github.com/b3log/latke) in %1$dms, %2$s -->", endTimeMillis - startTimeMillis, dateString); final String pjaxContainer = request.getHeader("X-PJAX-Container"); final String html = stringWriter.toString(); final String[] containers = StringUtils.substringsBetween(html, "<!---- pjax {" + pjaxContainer + "} start ---->", "<!---- pjax {" + pjaxContainer + "} end ---->"); if (null == containers) { return html + latke; } return String.join("", containers) + latke; }