List of usage examples for org.apache.commons.lang StringUtils countMatches
public static int countMatches(String str, String sub)
Counts how many times the substring appears in the larger String.
From source file:no.imr.stox.functions.utils.CovariateUtils.java
public static String getFullPeriod(String period, Integer year) { Integer n = StringUtils.countMatches(period, "/"); switch (n) {//from w w w.j ava 2 s. c om case 1: if (!(year % 400 == 0 || year % 4 == 0 && year % 100 != 0)) { // Reduce february max from 29 to 28 on max date Integer ns = period.indexOf("/"); Integer day = Conversion.safeSubstringToIntegerNULL(period, 0, ns); Integer month = Conversion.safeSubstringToIntegerNULL(period, ns + 1, period.length()); if (month == 2 && day == 29) { return IMRdate.encodeDateStr(day, month - 1, year); } } return period + "/" + year; case 2: return period; } return null; }
From source file:nz.ac.waikato.its.irr.scripts.FixSquishedMetadata.java
private static boolean process(Item item, String schema, String element, String qualifier, String delimiter, int minMatches, boolean dryRun) throws SQLException, AuthorizeException { boolean changes = false; List<Metadatum> newMetadata = new ArrayList<>(); Metadatum[] allMd = item.getMetadata(schema, element, qualifier, Item.ANY); for (Metadatum md : allMd) { if (StringUtils.isNotBlank(md.value) && StringUtils.countMatches(md.value, delimiter) >= minMatches) { String[] individualValues = StringUtils.splitByWholeSeparator(md.value, delimiter); for (int i = 0; i < individualValues.length; i++) { individualValues[i] = individualValues[i].replaceAll("(\\r|\\n|\\t)", " ").replaceAll(" ", " ") .trim();//from w w w. ja v a 2 s.c o m } System.out.println("item id=" + item.getID() + ": split |" + md.value + "| into |" + StringUtils.join(individualValues, '|') + "|"); if (!dryRun) { for (String individualValue : individualValues) { if (StringUtils.isNotBlank(individualValue)) { Metadatum newMd = new Metadatum(); newMd.language = md.language; newMd.value = individualValue; newMetadata.add(newMd); } } changes = true; } } else { newMetadata.add(md); } } if (!dryRun && changes) { item.clearMetadata(schema, element, qualifier, Item.ANY); for (Metadatum newMd : newMetadata) { item.addMetadata(schema, element, qualifier, newMd.language, newMd.value, newMd.authority, newMd.confidence); } item.updateMetadata(); } return changes; }
From source file:opennlp.tools.chatbot.SnippetToParagraphAndSectionHeaderContent.java
public HitBase formTextFromOriginalPageGivenSnippet(HitBase item) { Pair<String[], Map<String, String>> sentsMap = extractSentencesAndSectionMapFromPage(item.getUrl()); if (sentsMap == null) return item; String[] sents = sentsMap.getFirst(); item.setSectionHeaderContent(sentsMap.getSecond()); // String title = item.getTitle().replace("<b>", " ").replace("</b>", " // ")//w w w . j ava 2 s.c o m // .replace(" ", " ").replace(" ", " "); // generation results for this sentence List<String> result = new ArrayList<String>(); // form plain text from snippet String snapshot = item.getAbstractText().replace("<b>", " ").replace("</b>", " ").replace(" ", " ") .replace(" ", " ").replace("\"", ""); String snapshotMarked = snapshot.replace(" ...", "."); List<String> fragments = TextProcessor.splitToSentences(snapshotMarked); if (fragments.size() < 3 && StringUtils.countMatches(snapshotMarked, ".") > 1) { snapshotMarked = snapshotMarked.replace("..", "&").replace(".", "&"); String[] fragmSents = snapshotMarked.split("&"); fragments = Arrays.asList(fragmSents); } for (String f : fragments) { String followSent = null; if (f.length() < 50) continue; String pageSentence = ""; // try to find original sentence from webpage try { String[] mainAndFollowSent = getFullOriginalSentenceFromWebpageBySnippetFragment(f, sents); pageSentence = mainAndFollowSent[0]; followSent = mainAndFollowSent[1]; if (pageSentence != null && followSent != null) result.add(pageSentence + "\n" + followSent); else if (pageSentence != null) { result.add(pageSentence); } else { result.add(f); LOG.info("Could not find the original sentence \n" + f + "\n in the page "); } } catch (Exception e) { e.printStackTrace(); } } item.setOriginalSentences(result); return item; }
From source file:opennlp.tools.parse_thicket.apps.SnippetToParagraph.java
public HitBase formTextFromOriginalPageGivenSnippet(HitBase item) { String[] sents = extractSentencesFromPage(item.getUrl()); String title = item.getTitle().replace("<b>", " ").replace("</b>", " ").replace(" ", " ").replace(" ", " "); // generation results for this sentence List<String> result = new ArrayList<String>(); // form plain text from snippet String snapshot = item.getAbstractText().replace("<b>", " ").replace("</b>", " ").replace(" ", " ") .replace(" ", " ").replace("\"", ""); String snapshotMarked = snapshot.replace(" ...", "."); List<String> fragments = TextProcessor.splitToSentences(snapshotMarked); if (fragments.size() < 3 && StringUtils.countMatches(snapshotMarked, ".") > 1) { snapshotMarked = snapshotMarked.replace("..", "&").replace(".", "&"); String[] fragmSents = snapshotMarked.split("&"); fragments = Arrays.asList(fragmSents); }//from w w w .j a v a2 s .c o m for (String f : fragments) { String followSent = null; if (f.length() < 50) continue; String pageSentence = ""; // try to find original sentence from webpage try { String[] mainAndFollowSent = getFullOriginalSentenceFromWebpageBySnippetFragment(f, sents); pageSentence = mainAndFollowSent[0]; followSent = mainAndFollowSent[1]; if (pageSentence != null) result.add(pageSentence); else { result.add(f); LOG.info("Could not find the original sentence \n" + f + "\n in the page "); } //if (followSent !=null) // result.add(followSent); } catch (Exception e) { e.printStackTrace(); } } item.setOriginalSentences(result); return item; }
From source file:opennlp.tools.parse_thicket.kernel_interface.SnippetToParagraphFull.java
public HitBase formTextFromOriginalPageGivenSnippet(HitBase item) { String[] sents = extractSentencesFromPage(item.getUrl()); // String title = item.getTitle().replace("<b>", " ").replace("</b>", " // ")/*from w w w. j a v a2 s . c om*/ // .replace(" ", " ").replace(" ", " "); // generation results for this sentence List<String> result = new ArrayList<String>(); // form plain text from snippet String snapshot = item.getAbstractText().replace("<b>", " ").replace("</b>", " ").replace(" ", " ") .replace(" ", " ").replace("\"", ""); String snapshotMarked = snapshot.replace(" ...", "."); List<String> fragments = TextProcessor.splitToSentences(snapshotMarked); if (fragments.size() < 3 && StringUtils.countMatches(snapshotMarked, ".") > 1) { snapshotMarked = snapshotMarked.replace("..", "&").replace(".", "&"); String[] fragmSents = snapshotMarked.split("&"); fragments = Arrays.asList(fragmSents); } for (String f : fragments) { String followSent = null; if (f.length() < 50) continue; String pageSentence = ""; // try to find original sentence from webpage try { String[] mainAndFollowSent = getFullOriginalSentenceFromWebpageBySnippetFragment(f, sents); pageSentence = mainAndFollowSent[0]; followSent = mainAndFollowSent[1]; if (pageSentence != null && followSent != null) result.add(pageSentence + "\n" + followSent); else if (pageSentence != null) { result.add(pageSentence); } else { result.add(f); LOG.info("Could not find the original sentence \n" + f + "\n in the page "); } } catch (Exception e) { e.printStackTrace(); } } item.setOriginalSentences(result); return item; }
From source file:opennlp.tools.textsimilarity.TextProcessor.java
private static HashSet<String> extractCommonSegments(Pair<List<String>, Map<String, HashSet<Integer>>> objA, Pair<List<String>, Map<String, HashSet<Integer>>> objB, Integer segSize) { HashSet<String> commonSegments = new HashSet<String>(); List<String> tokensA = objA.getFirst(); Map<String, HashSet<Integer>> tokenPosB = objB.getSecond(); HashSet<Integer> lastPositions = null; int segLength = 1; StringBuffer segmentStr = new StringBuffer(); for (int i = 0; i < tokensA.size(); i++) { String token = tokensA.get(i); HashSet<Integer> positions = null; // if ((positions = tokenPosB.get(token)) != null && // !token.equals("<punc>") && // !StopList.getInstance().isStopWord(token) && token.length()>1) { if ((positions = tokenPosB.get(token)) != null) { // we have a list of positions if (lastPositions != null) { // see if there is overlap in positions if (hasNextPosition(lastPositions, positions)) { segLength++;//from ww w.ja va 2 s . c om commonSegments.remove(segmentStr.toString().trim()); segmentStr.append(" "); segmentStr.append(token); if (StringUtils.countMatches(segmentStr.toString(), " ") >= segSize) { commonSegments.add(segmentStr.toString().trim()); } lastPositions = positions; } else { // did not find segment, reset segLength = 1; segmentStr.setLength(0); lastPositions = null; } } else { lastPositions = positions; segmentStr.append(" "); segmentStr.append(token); } } else { // did not find segment, reset segLength = 1; segmentStr.setLength(0); lastPositions = null; } } return commonSegments; }
From source file:org.alfresco.module.vti.handler.alfresco.AlfrescoMeetingServiceHandler.java
/** * @see org.alfresco.module.vti.handler.MeetingServiceHandler#createWorkspace(String, String, int, TimeZoneInformation, SessionUser) *///w w w . ja v a 2s. c o m public String createWorkspace(String title, String templateName, int lcid, TimeZoneInformation timeZoneInformation, SessionUser user) throws Exception { // Build the site name from the title String siteName = removeIllegalCharacters(title); // A list of underscores is not a valid name. int matches = StringUtils.countMatches(siteName, "_"); if (matches > 0 && siteName.length() == matches) { siteName = DEFAULT_SITE_NAME; } // Build a unique name up SiteInfo siteInfo = null; String newSiteName = null; int i = 0; do { newSiteName = truncateSiteName(siteName, i == 0 ? "" : "_" + i); siteInfo = siteService.getSite(newSiteName); i++; } while (siteInfo != null); // Have it created shareUtils.createSite(user, MEETING_WORKSPACE_NAME, newSiteName, title, "", true); return newSiteName; }
From source file:org.apache.activemq.web.AjaxTest.java
public void assertResponseCount(int expected, String actual) { int occurrences = StringUtils.countMatches(actual, "<response"); assertEquals("Expected number of <response> elements is not correct.", expected, occurrences); }
From source file:org.apache.ambari.server.serveraction.upgrades.OozieConfigCalculationTest.java
/** * Checks that -Dhdp.version is added to $HADOOP_OPTS variable at oozie-env * content./*from ww w. ja v a 2 s .c o m*/ * Also checks that it is not added multiple times during upgrades * @throws Exception */ @Test public void testOozieEnvWithMissingParam() throws Exception { // Test case when old content does not contain $HADOOP_OPTS variable at all String oldContent = "#!/bin/bash\n" + "\n" + "if [ -d \"/usr/lib/bigtop-tomcat\" ]; then\n" + " export OOZIE_CONFIG=${OOZIE_CONFIG:-/etc/oozie/conf}\n" + " export CATALINA_BASE=${CATALINA_BASE:-{{oozie_server_dir}}}\n" + " export CATALINA_TMPDIR=${CATALINA_TMPDIR:-/var/tmp/oozie}\n" + " export OOZIE_CATALINA_HOME=/usr/lib/bigtop-tomcat\n" + "fi\n" + "\n" + "# export OOZIE_BASE_URL=\"http://${OOZIE_HTTP_HOSTNAME}:${OOZIE_HTTP_PORT}/oozie\"\n" + "export JAVA_LIBRARY_PATH={{hadoop_lib_home}}/native/Linux-amd64-64"; String newContent = OozieConfigCalculation.processPropertyValue(oldContent); assertTrue(newContent.endsWith("export HADOOP_OPTS=\"-Dhdp.version=$HDP_VERSION $HADOOP_OPTS\" ")); // Test case when old content contains proper $HADOOP_OPTS variable oldContent = newContent; newContent = OozieConfigCalculation.processPropertyValue(oldContent); assertEquals(newContent, oldContent); assertEquals(1, StringUtils.countMatches(newContent, "-Dhdp.version")); // Test case when old content contains $HADOOP_OPTS variable with some value oldContent = "#!/bin/bash\n" + "\n" + "if [ -d \"/usr/lib/bigtop-tomcat\" ]; then\n" + " export OOZIE_CONFIG=${OOZIE_CONFIG:-/etc/oozie/conf}\n" + " export CATALINA_BASE=${CATALINA_BASE:-{{oozie_server_dir}}}\n" + " export CATALINA_TMPDIR=${CATALINA_TMPDIR:-/var/tmp/oozie}\n" + " export OOZIE_CATALINA_HOME=/usr/lib/bigtop-tomcat\n" + " export HADOOP_OPTS=-Dsome.option1 -Dsome.option1 $HADOOP_OPTS\n" + "fi\n" + "\n" + "# export OOZIE_BASE_URL=\"http://${OOZIE_HTTP_HOSTNAME}:${OOZIE_HTTP_PORT}/oozie\"\n" + "export JAVA_LIBRARY_PATH={{hadoop_lib_home}}/native/Linux-amd64-64"; newContent = OozieConfigCalculation.processPropertyValue(oldContent); assertEquals("#!/bin/bash\n" + "\n" + "if [ -d \"/usr/lib/bigtop-tomcat\" ]; then\n" + " export OOZIE_CONFIG=${OOZIE_CONFIG:-/etc/oozie/conf}\n" + " export CATALINA_BASE=${CATALINA_BASE:-{{oozie_server_dir}}}\n" + " export CATALINA_TMPDIR=${CATALINA_TMPDIR:-/var/tmp/oozie}\n" + " export OOZIE_CATALINA_HOME=/usr/lib/bigtop-tomcat\n" + " export HADOOP_OPTS=-Dsome.option1 -Dsome.option1 $HADOOP_OPTS\n" + "fi\n" + "\n" + "# export OOZIE_BASE_URL=\"http://${OOZIE_HTTP_HOSTNAME}:${OOZIE_HTTP_PORT}/oozie\"\n" + "export JAVA_LIBRARY_PATH={{hadoop_lib_home}}/native/Linux-amd64-64\n" + "export HADOOP_OPTS=\"-Dhdp.version=$HDP_VERSION $HADOOP_OPTS\" ", newContent); }
From source file:org.apache.archiva.rest.services.DefaultBrowseService.java
protected List<ArtifactContentEntry> readFileEntries(File file, String filterPath, String repoId) throws IOException { Map<String, ArtifactContentEntry> artifactContentEntryMap = new HashMap<>(); int filterDepth = StringUtils.countMatches(filterPath, "/"); /*if ( filterDepth == 0 ) {/*from w ww .j av a2 s . com*/ filterDepth = 1; }*/ JarFile jarFile = new JarFile(file); try { Enumeration<JarEntry> jarEntryEnumeration = jarFile.entries(); while (jarEntryEnumeration.hasMoreElements()) { JarEntry currentEntry = jarEntryEnumeration.nextElement(); String cleanedEntryName = StringUtils.endsWith(currentEntry.getName(), "/") ? // StringUtils.substringBeforeLast(currentEntry.getName(), "/") : currentEntry.getName(); String entryRootPath = getRootPath(cleanedEntryName); int depth = StringUtils.countMatches(cleanedEntryName, "/"); if (StringUtils.isEmpty(filterPath) // && !artifactContentEntryMap.containsKey(entryRootPath) // && depth == filterDepth) { artifactContentEntryMap.put(entryRootPath, new ArtifactContentEntry(entryRootPath, !currentEntry.isDirectory(), depth, repoId)); } else { if (StringUtils.startsWith(cleanedEntryName, filterPath) // && (depth == filterDepth || (!currentEntry.isDirectory() && depth == filterDepth))) { artifactContentEntryMap.put(cleanedEntryName, new ArtifactContentEntry(cleanedEntryName, !currentEntry.isDirectory(), depth, repoId)); } } } if (StringUtils.isNotEmpty(filterPath)) { Map<String, ArtifactContentEntry> filteredArtifactContentEntryMap = new HashMap<>(); for (Map.Entry<String, ArtifactContentEntry> entry : artifactContentEntryMap.entrySet()) { filteredArtifactContentEntryMap.put(entry.getKey(), entry.getValue()); } List<ArtifactContentEntry> sorted = getSmallerDepthEntries(filteredArtifactContentEntryMap); if (sorted == null) { return Collections.emptyList(); } Collections.sort(sorted, ArtifactContentEntryComparator.INSTANCE); return sorted; } } finally { if (jarFile != null) { jarFile.close(); } } List<ArtifactContentEntry> sorted = new ArrayList<>(artifactContentEntryMap.values()); Collections.sort(sorted, ArtifactContentEntryComparator.INSTANCE); return sorted; }