List of usage examples for java.util.regex Matcher regionStart
public int regionStart()
From source file:com.application.utils.FastDateParser.java
/** * Initialize derived fields from defining fields. * This is called from constructor and from readObject (de-serialization) * * @param definingCalendar the {@link java.util.Calendar} instance used to initialize this FastDateParser *//*from w w w . ja va 2s. co m*/ private void init(Calendar definingCalendar) { final StringBuilder regex = new StringBuilder(); final List<Strategy> collector = new ArrayList<Strategy>(); final Matcher patternMatcher = formatPattern.matcher(pattern); if (!patternMatcher.lookingAt()) { throw new IllegalArgumentException( "Illegal pattern character '" + pattern.charAt(patternMatcher.regionStart()) + "'"); } currentFormatField = patternMatcher.group(); Strategy currentStrategy = getStrategy(currentFormatField, definingCalendar); for (;;) { patternMatcher.region(patternMatcher.end(), patternMatcher.regionEnd()); if (!patternMatcher.lookingAt()) { nextStrategy = null; break; } final String nextFormatField = patternMatcher.group(); nextStrategy = getStrategy(nextFormatField, definingCalendar); if (currentStrategy.addRegex(this, regex)) { collector.add(currentStrategy); } currentFormatField = nextFormatField; currentStrategy = nextStrategy; } if (patternMatcher.regionStart() != patternMatcher.regionEnd()) { throw new IllegalArgumentException( "Failed to parse \"" + pattern + "\" ; gave up at index " + patternMatcher.regionStart()); } if (currentStrategy.addRegex(this, regex)) { collector.add(currentStrategy); } currentFormatField = null; strategies = collector.toArray(new Strategy[collector.size()]); parsePattern = Pattern.compile(regex.toString()); }
From source file:au.org.ala.biocache.dao.SearchDAOImpl.java
/** * Format the search input query for a full-text search. * * This includes constructing a user friendly version of the query to * be used for display purposes./*from w ww . j a v a2 s.c om*/ * * TODO Fix this to use a state. REVISE!! * * @param searchParams */ protected void formatSearchQuery(SpatialSearchRequestParams searchParams, boolean forceQueryFormat) { //Only format the query if it doesn't already supply a formattedQuery. if (forceQueryFormat || StringUtils.isEmpty(searchParams.getFormattedQuery())) { // set the query String query = searchParams.getQ(); //cached query parameters are already formatted if (query.contains("qid:")) { Matcher matcher = qidPattern.matcher(query); long qid = 0; while (matcher.find()) { String value = matcher.group(); try { String qidValue = SearchUtils.stripEscapedQuotes(value.substring(4)); qid = Long.parseLong(qidValue); ParamsCacheObject pco = ParamsCache.get(qid); if (pco != null) { searchParams.setQId(qid); searchParams.setQ(pco.getQ()); //add the fqs from the params cache if (pco.getFqs() != null) { String[] currentFqs = searchParams.getFq(); if (currentFqs == null || (currentFqs.length == 1 && currentFqs[0].length() == 0)) { searchParams.setFq(pco.getFqs()); } else { //we need to add the current Fqs together searchParams.setFq((String[]) ArrayUtils.addAll(currentFqs, pco.getFqs())); } } String displayString = pco.getDisplayString(); if (StringUtils.isNotEmpty(pco.getWkt())) { displayString = displayString + " within user defined polygon"; } searchParams.setDisplayString(displayString); if (searchParams instanceof SpatialSearchRequestParams) { ((SpatialSearchRequestParams) searchParams).setWkt(pco.getWkt()); } else if (StringUtils.isNotEmpty(pco.getWkt())) { String originalQ = searchParams.getQ(); searchParams.setQ(spatialField + ":\"Intersects(" + pco.getWkt() + ")"); if (StringUtils.isNotEmpty(originalQ)) searchParams.setQ(searchParams.getQ() + " AND " + originalQ); } searchParams.setFormattedQuery(searchParams.getQ()); return; } } catch (NumberFormatException e) { } catch (ParamsCacheMissingException e) { } } } StringBuffer queryString = new StringBuffer(); StringBuffer displaySb = new StringBuffer(); String displayString = query; // look for field:term sub queries and catch fields: matched_name & matched_name_children if (query.contains(":")) { // will match foo:bar, foo:"bar bash" & foo:bar\ bash Matcher matcher = termPattern.matcher(query); queryString.setLength(0); while (matcher.find()) { String value = matcher.group(); logger.debug("term query: " + value); logger.debug("groups: " + matcher.group(1) + "|" + matcher.group(2)); if ("matched_name".equals(matcher.group(1))) { // name -> accepted taxon name (taxon_name:) String field = matcher.group(1); String queryText = matcher.group(2); if (queryText != null && !queryText.isEmpty()) { String guid = speciesLookupService.getGuidForName(queryText.replaceAll("\"", "")); // strip any quotes logger.info("GUID for " + queryText + " = " + guid); if (guid != null && !guid.isEmpty()) { String acceptedName = speciesLookupService.getAcceptedNameForGuid(guid); // strip any quotes logger.info("acceptedName for " + queryText + " = " + acceptedName); if (acceptedName != null && !acceptedName.isEmpty()) { field = "taxon_name"; queryText = acceptedName; } } else { field = "taxon_name"; } // also change the display query displayString = displayString.replaceAll("matched_name", "taxon_name"); } if (StringUtils.containsAny(queryText, CHARS) && !queryText.startsWith("[")) { // quote any text that has spaces or colons but not range queries queryText = QUOTE + queryText + QUOTE; } logger.debug("queryText: " + queryText); matcher.appendReplacement(queryString, matcher.quoteReplacement(field + ":" + queryText)); } else if ("matched_name_children".equals(matcher.group(1))) { String field = matcher.group(1); String queryText = matcher.group(2); if (queryText != null && !queryText.isEmpty()) { String guid = speciesLookupService.getGuidForName(queryText.replaceAll("\"", "")); // strip any quotes logger.info("GUID for " + queryText + " = " + guid); if (guid != null && !guid.isEmpty()) { field = "lsid"; queryText = guid; } else { field = "taxon_name"; } } if (StringUtils.containsAny(queryText, CHARS) && !queryText.startsWith("[")) { // quote any text that has spaces or colons but not range queries queryText = QUOTE + queryText + QUOTE; } matcher.appendReplacement(queryString, matcher.quoteReplacement(field + ":" + queryText)); } else { matcher.appendReplacement(queryString, matcher.quoteReplacement(value)); } } matcher.appendTail(queryString); query = queryString.toString(); } //if the query string contains lsid: we will need to replace it with the corresponding lft range int last = 0; if (query.contains("lsid:")) { Matcher matcher = lsidPattern.matcher(query); queryString.setLength(0); while (matcher.find()) { //only want to process the "lsid" if it does not represent taxon_concept_lsid etc... if ((matcher.start() > 0 && query.charAt(matcher.start() - 1) != '_') || matcher.start() == 0) { String value = matcher.group(); logger.debug("preprocessing " + value); String lsid = matcher.group(2); if (lsid.contains("\"")) { //remove surrounding quotes, if present lsid = lsid.replaceAll("\"", ""); } if (lsid.contains("\\")) { //remove internal \ chars, if present //noinspection MalformedRegex lsid = lsid.replaceAll("\\\\", ""); } logger.debug("lsid = " + lsid); String[] values = searchUtils.getTaxonSearch(lsid); String lsidHeader = matcher.group(1).length() > 0 ? matcher.group(1) : ""; matcher.appendReplacement(queryString, lsidHeader + values[0]); displaySb.append(query.substring(last, matcher.start())); if (!values[1].startsWith("taxon_concept_lsid:")) displaySb.append(lsidHeader).append("<span class='lsid' id='").append(lsid).append("'>") .append(values[1]).append("</span>"); else displaySb.append(lsidHeader).append(values[1]); last = matcher.end(); //matcher.appendReplacement(displayString, values[1]); } } matcher.appendTail(queryString); displaySb.append(query.substring(last, query.length())); query = queryString.toString(); displayString = displaySb.toString(); } if (query.contains("urn")) { //escape the URN strings before escaping the rest this avoids the issue with attempting to search on a urn field Matcher matcher = urnPattern.matcher(query); queryString.setLength(0); while (matcher.find()) { String value = matcher.group(); logger.debug("escaping lsid urns " + value); matcher.appendReplacement(queryString, prepareSolrStringForReplacement(value)); } matcher.appendTail(queryString); query = queryString.toString(); } if (query.contains("Intersects")) { Matcher matcher = spatialPattern.matcher(query); if (matcher.find()) { String spatial = matcher.group(); SpatialSearchRequestParams subQuery = new SpatialSearchRequestParams(); logger.debug("region Start : " + matcher.regionStart() + " start : " + matcher.start() + " spatial length " + spatial.length() + " query length " + query.length()); //format the search query of the remaining text only subQuery.setQ(query.substring(matcher.start() + spatial.length(), query.length())); //format the remaining query formatSearchQuery(subQuery); //now append Q's together queryString.setLength(0); //need to include the prefix queryString.append(query.substring(0, matcher.start())); queryString.append(spatial); queryString.append(subQuery.getFormattedQuery()); searchParams.setFormattedQuery(queryString.toString()); //add the spatial information to the display string if (spatial.contains("circles")) { String[] values = spatial.substring(spatial.indexOf("=") + 1, spatial.indexOf("}")) .split(","); if (values.length == 3) { displaySb.setLength(0); displaySb.append(subQuery.getDisplayString()); displaySb.append(" - within ").append(values[2]).append(" km of point(") .append(values[0]).append(",").append(values[1]).append(")"); searchParams.setDisplayString(displaySb.toString()); } } else { searchParams.setDisplayString(subQuery.getDisplayString() + " - within supplied region"); } } } else { //escape reserved characters unless the colon represnts a field name colon queryString.setLength(0); Matcher matcher = spacesPattern.matcher(query); while (matcher.find()) { String value = matcher.group(); //special cases to ignore from character escaping //if the value is a single - or * it means that we don't want to escape it as it is likely to have occurred in the following situation -(occurrence_date:[* TO *]) or *:* if (!value.equals("-") && /*!value.equals("*") && !value.equals("*:*") && */ !value.endsWith("*")) { //split on the colon String[] bits = StringUtils.split(value, ":", 2); if (bits.length == 2) { if (!bits[0].contains("urn") && !bits[1].contains("urn\\")) matcher.appendReplacement(queryString, bits[0] + ":" + prepareSolrStringForReplacement(bits[1])); } else if (!value.endsWith(":")) { //need to ignore field names where the : is at the end because the pattern matching will return field_name: as a match when it has a double quoted value //default behaviour is to escape all matcher.appendReplacement(queryString, prepareSolrStringForReplacement(value)); } } } matcher.appendTail(queryString); //substitute better display strings for collection/inst etc searches if (displayString.contains("_uid")) { displaySb.setLength(0); String normalised = displayString.replaceAll("\"", ""); matcher = uidPattern.matcher(normalised); while (matcher.find()) { String newVal = "<span>" + searchUtils.getUidDisplayString(matcher.group(1), matcher.group(2)) + "</span>"; if (newVal != null) matcher.appendReplacement(displaySb, newVal); } matcher.appendTail(displaySb); displayString = displaySb.toString(); } if (searchParams.getQ().equals("*:*")) { displayString = "[all records]"; } if (searchParams.getLat() != null && searchParams.getLon() != null && searchParams.getRadius() != null) { displaySb.setLength(0); displaySb.append(displayString); displaySb.append(" - within ").append(searchParams.getRadius()).append(" km of point(") .append(searchParams.getLat()).append(",").append(searchParams.getLon()).append(")"); displayString = displaySb.toString(); } // substitute i18n version of field name, if found in messages.properties displayString = formatDisplayStringWithI18n(displayString); searchParams.setFormattedQuery(queryString.toString()); logger.debug("formattedQuery = " + queryString); logger.debug("displayString = " + displayString); searchParams.setDisplayString(displayString); } //format the fq's for facets that need ranges substituted for (int i = 0; i < searchParams.getFq().length; i++) { String fq = searchParams.getFq()[i]; String[] parts = fq.split(":", 2); //check to see if the first part is a range based query and update if necessary Map<String, String> titleMap = RangeBasedFacets.getTitleMap(parts[0]); if (titleMap != null) { searchParams.getFq()[i] = titleMap.get(parts[1]); } } } searchParams.setDisplayString(formatDisplayStringWithI18n(searchParams.getDisplayString())); }
From source file:org.sqlite.date.FastDateParser.java
/** * Initialize derived fields from defining fields. * This is called from constructor and from readObject (de-serialization) * * @param definingCalendar the {@link java.util.Calendar} instance used to initialize this FastDateParser *///w ww . j ava 2s . c o m private void init(final Calendar definingCalendar) { final StringBuilder regex = new StringBuilder(); final List<Strategy> collector = new ArrayList<Strategy>(); final Matcher patternMatcher = formatPattern.matcher(pattern); if (!patternMatcher.lookingAt()) { throw new IllegalArgumentException( "Illegal pattern character '" + pattern.charAt(patternMatcher.regionStart()) + "'"); } currentFormatField = patternMatcher.group(); Strategy currentStrategy = getStrategy(currentFormatField, definingCalendar); for (;;) { patternMatcher.region(patternMatcher.end(), patternMatcher.regionEnd()); if (!patternMatcher.lookingAt()) { nextStrategy = null; break; } final String nextFormatField = patternMatcher.group(); nextStrategy = getStrategy(nextFormatField, definingCalendar); if (currentStrategy.addRegex(this, regex)) { collector.add(currentStrategy); } currentFormatField = nextFormatField; currentStrategy = nextStrategy; } if (patternMatcher.regionStart() != patternMatcher.regionEnd()) { throw new IllegalArgumentException( "Failed to parse \"" + pattern + "\" ; gave up at index " + patternMatcher.regionStart()); } if (currentStrategy.addRegex(this, regex)) { collector.add(currentStrategy); } currentFormatField = null; strategies = collector.toArray(new Strategy[collector.size()]); parsePattern = Pattern.compile(regex.toString()); }
From source file:org.xchain.framework.jsl.TemplateSourceBuilder.java
/** * Parses an attribute value template into fixed and dynamic parts. This list will always start with a fixed part and * then include alternating dynamic and fixed parts. *//* w ww . j a va 2s .co m*/ public static List<String> parseAttributeValueTemplate(String attributeValueTemplate) throws SAXException { // the result. ArrayList<String> result = new ArrayList<String>(); // create the matcher. Matcher matcher = ATTRIBUTE_VALUE_TEMPLATE_PATTERN.matcher(attributeValueTemplate); while (matcher.find()) { String fixedPart = matcher.group(1); String dynamicPart = matcher.group(2); if (result.isEmpty() && fixedPart == null) { result.add(""); } if (fixedPart != null) { result.add(fixedPart.replaceAll("\\{\\{", "{").replaceAll("\\}\\}", "}")); } if (dynamicPart != null) { result.add(dynamicPart); } } if (!matcher.hitEnd()) { throw new SAXException( "The attribute value template '" + attributeValueTemplate + "' has an error between characters " + matcher.regionStart() + " and " + matcher.regionEnd() + "."); } return result; }
From source file:org.xchain.framework.util.AttributesUtil.java
/** * Parses an attribute value template into fixed and dynamic parts. This list will always start with a fixed part and * then include alternating dynamic and fixed parts. *///from ww w .ja v a 2 s . c o m public static List<String> parseAttributeValueTemplate(String attributeValueTemplate) throws SAXException { // the result. ArrayList<String> result = new ArrayList<String>(); // create the matcher. Matcher matcher = attributeValueTemplatePattern.matcher(attributeValueTemplate); while (matcher.lookingAt()) { String fixedPart = matcher.group(1); String dynamicPart = matcher.group(2); if (result.isEmpty() && fixedPart == null) { result.add(""); } if (fixedPart != null) { result.add(fixedPart.replaceAll("\\{\\{", "{").replaceAll("\\}\\}", "}")); } if (dynamicPart != null) { result.add(dynamicPart); } matcher.region(matcher.regionStart() + matcher.group().length(), matcher.regionEnd()); } if (!matcher.hitEnd()) { throw new SAXException( "The attribute value template '" + attributeValueTemplate + "' has an error between characters " + matcher.regionStart() + " and " + matcher.regionEnd() + "."); } return result; }