List of usage examples for java.util.regex Matcher start
public int start()
From source file:com.ikanow.infinit.e.harvest.enrichment.custom.UnstructuredAnalysisHarvester.java
/** * processHeader/*from www .java2 s .co m*/ * * @param headerPattern * @param f * @param meta */ @SuppressWarnings("deprecation") private void processHeader(Pattern headerPattern, DocumentPojo f, List<metaField> meta, SourcePojo source, UnstructuredAnalysisConfigPojo uap) { if (headerPattern != null) { Matcher headerMatcher = headerPattern.matcher(f.getFullText()); String headerText = null; while (headerMatcher.find()) { if (headerMatcher.start() == 0) { headerText = headerMatcher.group(0); f.setHeaderEndIndex(headerText.length()); for (int i = 1; i < headerMatcher.groupCount() + 1; i++) { f.addToHeader(headerMatcher.group(i).trim()); } break; } } if (null != headerText && null != meta) { for (metaField m : meta) { if (m.context == Context.Header || m.context == Context.All) { this.processMeta(f, m, headerText, source, uap); } } } } }
From source file:com.dwdesign.tweetings.activity.ComposeActivity.java
private final void gatherLinks(ArrayList<Hyperlink> links, Spannable s, Pattern pattern) { // Matcher matching the pattern Matcher m = pattern.matcher(s); while (m.find()) { int start = m.start(); int end = m.end(); /*/*from ww w . j a v a 2s .c o m*/ * Hyperlink is basically used like a structure for storing the information about * where the link was found. */ Hyperlink spec = new Hyperlink(); spec.textSpan = s.subSequence(start, end); spec.span = new InternalURLSpan(spec.textSpan.toString()); spec.start = start; spec.end = end; links.add(spec); } }
From source file:net.yacy.cora.document.id.MultiProtocolURL.java
/** * Resolve '..' segments in the path.// w ww . jav a2s . c o m * For standard pseudo algorithms, see : * <ul> * <li>https://tools.ietf.org/html/rfc3986#section-5.2.4</li> * <li>https://url.spec.whatwg.org/#path-state</li> * <li>https://www.w3.org/TR/url/#relative-path-state</li> * </ul> * @param path URL path part : must not be null * @return the path with '..' segments resolved */ private static final String resolveBackpath(final String path) { String p = path; if (p.isEmpty() || p.charAt(0) != '/') { p = "/" + p; } final Matcher qm = CommonPattern.QUESTION.matcher(p); // do not resolve backpaths in the post values final int end = qm.find() ? qm.start() : p.length(); final Matcher matcher = backPathPattern.matcher(p); while (matcher.find()) { if (matcher.start() > end) break; p = matcher.replaceAll(""); matcher.reset(p); } /* Let's remove any eventual remaining but inappropriate '..' segments at the beginning. * See https://tools.ietf.org/html/rfc3986#section-5.2.4 -> parts 2.C and 2.D */ while (p.startsWith("/../")) { p = p.substring(3); } if (p.equals("/..")) { p = "/"; } return p.equals("") ? "/" : p; }
From source file:net.sf.jabref.wizard.auximport.AuxSubGenerator.java
/** * parseAuxFile read the Aux file and fill up some intern data structures. Nested aux files (latex \\include) * supported!/* www .j a va 2 s . c om*/ * * @param filename String : Path to LatexAuxFile * @return boolean, true = no error occurs */ // found at comp.text.tex // > Can anyone tell be the information held within a .aux file? Is there a // > specific format to this file? // // I don't think there is a particular format. Every package, class // or document can write to the aux file. The aux file consists of LaTeX macros // and is read at the \begin{document} and again at the \end{document}. // // It usually contains information about existing labels // \\newlabel{sec:Intro}{{1}{1}} // and citations // \citation{hiri:conv:1993} // and macros to write information to other files (like toc, lof or lot files) // \@writefile{toc}{\contentsline {section}{\numberline // {1}Intro}{1}} // but as I said, there can be a lot more // aux file : // // \\citation{x} x = used reference of bibtex library entry // // \\@input{x} x = nested aux file // // the \\bibdata{x} directive contains information about the // bibtex library file -> x = name of bib file // // \\bibcite{x}{y} // x is a label for an item and y is the index in bibliography private boolean parseAuxFile(String filename) { // regular expressions Matcher matcher; // while condition boolean cont; // return value -> default: no error boolean back = true; // file list, used for nested aux files List<String> fileList = new ArrayList<>(5); fileList.add(filename); // get the file path File dummy = new File(filename); String path = dummy.getParent(); if (path == null) { path = ""; } else { path = path + File.separator; } nestedAuxCounter = -1; // count only the nested reads // index of current file in list int fileIndex = 0; while (fileIndex < fileList.size()) { String fName = fileList.get(fileIndex); try (BufferedReader br = new BufferedReader(new FileReader(fName))) { cont = true; while (cont) { Optional<String> maybeLine; try { maybeLine = Optional.ofNullable(br.readLine()); } catch (IOException ioe) { maybeLine = Optional.empty(); } if (maybeLine.isPresent()) { String line = maybeLine.get(); matcher = TAG_PATTERN.matcher(line); while (matcher.find()) { // extract the bibtex-key(s) XXX from \citation{XXX} string int len = matcher.end() - matcher.start(); if (len > 11) { String str = matcher.group(2); // could be an comma separated list of keys String[] keys = str.split(","); if (keys != null) { for (String dummyStr : keys) { if (dummyStr != null) { // delete all unnecessary blanks and save key into an set mySet.add(dummyStr.trim()); } } } } } // try to find a nested aux file int index = line.indexOf("\\@input{"); if (index >= 0) { int start = index + 8; int end = line.indexOf('}', start); if (end > start) { String str = path + line.substring(index + 8, end); // if filename already in file list if (!fileList.contains(str)) { fileList.add(str); // insert file into file list } } } } else { cont = false; } } nestedAuxCounter++; } catch (FileNotFoundException e) { LOGGER.info("Cannot locate input file!", e); } catch (IOException e) { LOGGER.warn("Problem opening file!", e); } fileIndex++; // load next file } return back; }
From source file:au.org.ala.names.search.ALANameSearcher.java
/** * Update the rank for the name based on it containing rank strings. * Provides a bit of a sanity check on the name matching. If we expect a * species we don't want to match on a genus * * @param name/*from w w w . j av a 2s . co m*/ * @param rank */ private RankType getUpdatedRank(String name, RankType rank) { Matcher matcher = RANK_MARKER.matcher(name); if (matcher.find()) { String value = name.substring(matcher.start(), matcher.end()); log.debug("Changing rank to : " + value); if (value.endsWith(".")) rank = RankType.getForCBRank(Rank.RANK_MARKER_MAP.get(value.substring(1, value.length() - 1))); log.debug("Using the new rank " + rank); } return rank; }
From source file:fr.gouv.culture.thesaurus.service.impl.SesameThesaurus.java
/** * Abrge le libell en ne renvoyant que la premire occurrence du texte * trouv, avec le contexte et en surlignant les termes trouvs. Si aucune * occurrence n'a t trouve, renvoie la premire partie du libell. * //from w w w . j a v a2 s .co m * @param matchingLabel * Libell correspondant la requte * @param queryPattern * Requte d'origine sous forme d'expression rgulire * @return Premire occurrence du texte trouv avec le contexte et le * surlignage en HTML */ private String abbreviateAndHighlightMatchingLabel(String matchingLabel, Pattern queryPattern) { final Matcher matcher = queryPattern.matcher(matchingLabel); final int maxDescriptionLength = configuration.getMatchingLabelFirstOccurrenceWidth(); String abbreviatedVersion; if (matcher.find()) { final int contextMaxLength = configuration.getMatchingLabelContextLength(); final int highlightMaxLength = maxDescriptionLength - 2 * contextMaxLength; if (highlightMaxLength < 1) { throw new IllegalArgumentException( "Invalid configuration: the occurrence width is not long enough to hold the highlighted part and the context."); } abbreviatedVersion = TextUtils.htmlHighlightOccurrence(matchingLabel, matcher.start(), matcher.end(), highlightMaxLength, contextMaxLength, "<em>", "</em>"); } else { /* * Pour une certaine raison, les termes trouvs par la recherche ne * sont pas localisables dans le texte trait avec Java. On renvoie * alors le dbut du libell correspondant. */ abbreviatedVersion = StringEscapeUtils .escapeHtml4(TextUtils.leftAbbreviateOnWords(matchingLabel, maxDescriptionLength)); } return abbreviatedVersion; }
From source file:tr.edu.gsu.nerwip.retrieval.reader.wikipedia.WikipediaReader.java
/** * Retrieces the category of the article * from its content, and more particularly * from its first sentence, which generally * takes the following form in biographies: * "Firstname Lastname (19xx-19xx) was/is a politician/artist/etc." * /* ww w . ja va2 s . c o m*/ * @param article * Article to be processed. * @return * The identified categories, possibly an empty list if none * could be identified. */ public List<ArticleCategory> getArticleCategoriesFromContent(Article article) { logger.log("Using the article content to retrieve its categories"); Set<ArticleCategory> categories = new TreeSet<ArticleCategory>(); logger.increaseOffset(); // get first sentence String text = article.getRawText(); String firstSentence = null; Pattern pattern = Pattern.compile("[a-zA-Z0-9]{3,}\\. "); Matcher matcher = pattern.matcher(text); if (!matcher.find()) logger.log("Could not find the first sentence of the article"); else { int i = matcher.end(); firstSentence = text.substring(0, i); logger.log("First sentence of the article: \"" + firstSentence + "\""); // identify state verb (to be) int index = firstSentence.length(); String verb = null; for (String v : STATE_VERBS) { pattern = Pattern.compile("[^a-zA-Z0-9]" + v + "[^a-zA-Z0-9]"); matcher = pattern.matcher(firstSentence); if (matcher.find()) { i = matcher.start(); if (i > -1 && i < index) { index = i; verb = v; } } } if (verb == null) logger.log("WARNING: could not find any state verb in the first sentence"); else { logger.log("State verb detected in the sentence: '" + verb + "'"); // look for key words located in the second part of the sentence (after the verb) firstSentence = firstSentence.substring(index + verb.length()); logger.log("Focusing on the end of the sentence: \"" + firstSentence + "\""); logger.increaseOffset(); String temp[] = firstSentence.split("[^a-zA-Z0-9]"); for (String key : temp) { if (!key.isEmpty()) { ArticleCategory cat = CONTENT_CONVERSION_MAP.get(key); if (cat == null) { logger.log(key + ": no associated category"); } else { categories.add(cat); logger.log(key + ": category " + cat); } } } logger.decreaseOffset(); } } List<ArticleCategory> result = new ArrayList<ArticleCategory>(categories); Collections.sort(result); logger.decreaseOffset(); logger.log("detected categories: " + result.toString()); return result; }
From source file:com.aliyun.odps.conf.Configuration.java
private String substituteVars(String expr) { if (expr == null) { return null; }//from www .jav a 2 s. c o m Matcher match = varPat.matcher(""); String eval = expr; for (int s = 0; s < MAX_SUBST; s++) { match.reset(eval); if (!match.find()) { return eval; } String var = match.group(); var = var.substring(2, var.length() - 1); // remove ${ .. } String val = null; try { val = System.getProperty(var); } catch (SecurityException se) { LOG.warn("No permission to get system property: " + var); } if (val == null) { val = getRaw(var); } if (val == null) { return eval; // return literal ${var}: var is unbound } // substitute eval = eval.substring(0, match.start()) + val + eval.substring(match.end()); } throw new IllegalStateException("Variable substitution depth too large: " + MAX_SUBST + " " + expr); }
From source file:cn.suishen.email.activity.MessageViewFragmentBase.java
/** * Reload the body from the provider cursor. This must only be called from the UI thread. * * @param bodyText text part//from ww w. j av a2s.c o m * @param bodyHtml html part * * TODO deal with html vs text and many other issues <- WHAT DOES IT MEAN?? */ private void reloadUiFromBody(String bodyText, String bodyHtml, boolean autoShowPictures) { String text = null; mHtmlTextRaw = null; boolean hasImages = false; if (bodyHtml == null) { text = bodyText; /* * Convert the plain text to HTML */ StringBuffer sb = new StringBuffer("<html><body>"); if (text != null) { // Escape any inadvertent HTML in the text message text = EmailHtmlUtil.escapeCharacterToDisplay(text); // Find any embedded URL's and linkify Matcher m = Patterns.WEB_URL.matcher(text); while (m.find()) { int start = m.start(); /* * WEB_URL_PATTERN may match domain part of email address. To detect * this false match, the character just before the matched string * should not be '@'. */ if (start == 0 || text.charAt(start - 1) != '@') { String url = m.group(); Matcher proto = WEB_URL_PROTOCOL.matcher(url); String link; if (proto.find()) { // This is work around to force URL protocol part be lower case, // because WebView could follow only lower case protocol link. link = proto.group().toLowerCase() + url.substring(proto.end()); } else { // Patterns.WEB_URL matches URL without protocol part, // so added default protocol to link. link = "http://" + url; } String href = String.format("<a href=\"%s\">%s</a>", link, url); m.appendReplacement(sb, href); } else { m.appendReplacement(sb, "$0"); } } m.appendTail(sb); } sb.append("</body></html>"); text = sb.toString(); } else { text = bodyHtml; mHtmlTextRaw = bodyHtml; hasImages = IMG_TAG_START_REGEX.matcher(text).find(); } // TODO this is not really accurate. // - Images aren't the only network resources. (e.g. CSS) // - If images are attached to the email and small enough, we download them at once, // and won't need network access when they're shown. if (hasImages) { if (mRestoredPictureLoaded || autoShowPictures) { blockNetworkLoads(false); addTabFlags(TAB_FLAGS_PICTURE_LOADED); // Set for next onSaveInstanceState // Make sure to reset the flag -- otherwise this will keep taking effect even after // moving to another message. mRestoredPictureLoaded = false; } else { addTabFlags(TAB_FLAGS_HAS_PICTURES); } } setMessageHtml(text); // Ask for attachments after body new LoadAttachmentsTask().executeParallel(mMessage.mId); mIsMessageLoadedForTest = true; }
From source file:de.dfki.iui.opentok.cordova.plugin.OpenTokPlugin.java
private int extractDp(JSONArray args, int index) throws JSONException { JSONException failure = null;/*from w ww. j a va2 s . c o m*/ int result = 0; try { result = args.getInt(index); } catch (JSONException e) { failure = e; } if (failure != null) { String temp = args.getString(index); //simple decimal number pattern Pattern p = Pattern.compile("([+-]?\\d+([,.]\\d+)?)"); Matcher m = p.matcher(temp); if (m.find()) { result = Integer.parseInt(temp.substring(m.start(), m.end())); } } return result; }