List of usage examples for java.util.regex Pattern DOTALL
int DOTALL
To view the source code for java.util.regex Pattern DOTALL.
Click Source Link
From source file:org.apache.hadoop.hive.serde2.RegexSerDe.java
@Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { // We can get the table definition from tbl. // Read the configuration parameters inputRegex = tbl.getProperty(INPUT_REGEX); String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); boolean inputRegexIgnoreCase = "true".equalsIgnoreCase(tbl.getProperty(INPUT_REGEX_CASE_SENSITIVE)); // output format string is not supported anymore, warn user of deprecation if (null != tbl.getProperty("output.format.string")) { LOG.warn("output.format.string has been deprecated"); }//from www. ja va 2 s . c o m // Parse the configuration parameters if (inputRegex != null) { inputPattern = Pattern.compile(inputRegex, Pattern.DOTALL + (inputRegexIgnoreCase ? Pattern.CASE_INSENSITIVE : 0)); } else { inputPattern = null; throw new SerDeException("This table does not have serde property \"input.regex\"!"); } List<String> columnNames = Arrays.asList(columnNameProperty.split(",")); columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); assert columnNames.size() == columnTypes.size(); numColumns = columnNames.size(); /* Constructing the row ObjectInspector: * The row consists of some set of primitive columns, each column will * be a java object of primitive type. */ List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size()); for (int c = 0; c < numColumns; c++) { TypeInfo typeInfo = columnTypes.get(c); if (typeInfo instanceof PrimitiveTypeInfo) { PrimitiveTypeInfo pti = (PrimitiveTypeInfo) columnTypes.get(c); AbstractPrimitiveJavaObjectInspector oi = PrimitiveObjectInspectorFactory .getPrimitiveJavaObjectInspector(pti); columnOIs.add(oi); } else { throw new SerDeException(getClass().getName() + " doesn't allow column [" + c + "] named " + columnNames.get(c) + " with type " + columnTypes.get(c)); } } // StandardStruct uses ArrayList to store the row. rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs, Lists.newArrayList(Splitter.on('\0').split(tbl.getProperty("columns.comments")))); row = new ArrayList<Object>(numColumns); // Constructing the row object, etc, which will be reused for all rows. for (int c = 0; c < numColumns; c++) { row.add(null); } outputFields = new Object[numColumns]; outputRowText = new Text(); }
From source file:de.ist.clonto.webwiki.InfoboxParser.java
private String replaceHTMLComments(String text) { text = Pattern.compile("<!--.*?-->", Pattern.MULTILINE | Pattern.DOTALL).matcher(text).replaceAll(""); return text;// w w w .j a v a2s . com }
From source file:org.opennms.web.rest.v1.AcknowledgmentRestServiceIT.java
@Test @JUnitTemporaryDatabase/*from ww w . j a v a 2 s. c o m*/ public void testAcknowlegeNotification() throws Exception { final Pattern p = Pattern.compile("^.*<answeredBy>(.*?)</answeredBy>.*$", Pattern.DOTALL & Pattern.MULTILINE); sendData(POST, MediaType.APPLICATION_FORM_URLENCODED, "/acks", "notifId=1&action=ack", 200); String xml = sendRequest(GET, "/notifications/1", new HashMap<String, String>(), 200); Matcher m = p.matcher(xml); assertTrue(m.matches()); assertTrue(m.group(1).equals("admin")); sendData(POST, MediaType.APPLICATION_FORM_URLENCODED, "/acks", "notifId=1&action=unack", 200); xml = sendRequest(GET, "/notifications/1", new HashMap<String, String>(), 200); m = p.matcher(xml); assertFalse(m.matches()); }
From source file:org.apache.hadoop.hive.contrib.serde2.RegexSerDe.java
@Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { // We can get the table definition from tbl. // Read the configuration parameters inputRegex = tbl.getProperty(INPUT_REGEX); outputFormatString = tbl.getProperty(OUTPUT_FORMAT_STRING); String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); boolean inputRegexIgnoreCase = "true".equalsIgnoreCase(tbl.getProperty(INPUT_REGEX_CASE_SENSITIVE)); // Parse the configuration parameters if (inputRegex != null) { inputPattern = Pattern.compile(inputRegex, Pattern.DOTALL + (inputRegexIgnoreCase ? Pattern.CASE_INSENSITIVE : 0)); } else {/*from w w w .j ava2 s .com*/ inputPattern = null; } List<String> columnNames = Arrays.asList(columnNameProperty.split(",")); List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); assert columnNames.size() == columnTypes.size(); numColumns = columnNames.size(); // All columns have to be of type STRING. for (int c = 0; c < numColumns; c++) { if (!columnTypes.get(c).equals(TypeInfoFactory.stringTypeInfo)) { throw new SerDeException(getClass().getName() + " only accepts string columns, but column[" + c + "] named " + columnNames.get(c) + " has type " + columnTypes.get(c)); } } // Constructing the row ObjectInspector: // The row consists of some string columns, each column will be a java // String object. List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size()); for (int c = 0; c < numColumns; c++) { columnOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); } // StandardStruct uses ArrayList to store the row. rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs); // Constructing the row object, etc, which will be reused for all rows. row = new ArrayList<String>(numColumns); for (int c = 0; c < numColumns; c++) { row.add(null); } outputFields = new Object[numColumns]; outputRowText = new Text(); }
From source file:org.openrdf.repository.sparql.query.SPARQLQuery.java
protected Set<String> getBindingNames() { if (bindings.size() == 0) return Collections.EMPTY_SET; Set<String> names = new HashSet<String>(); String qry = query;//from ww w . j ava2 s. c o m int b = qry.indexOf('{'); String select = qry.substring(0, b); for (String name : bindings.getBindingNames()) { String replacement = getReplacement(bindings.getValue(name)); if (replacement != null) { String pattern = ".*[\\?\\$]" + name + "\\W.*"; if (Pattern.compile(pattern, Pattern.MULTILINE | Pattern.DOTALL).matcher(select).matches()) { names.add(name); } } } return names; }
From source file:fr.dudie.acrachilisync.tools.upgrade.IssueDescriptionReaderV1.java
/** * Extracts the list of bug occurrences from the description. * //from w w w . ja va 2s . co m * @param pDescription * the issue description * @param pStacktraceMD5 * the stacktrace MD5 hash the issue is related to * @return the ACRA bug occurrences listed in the description * @throws IssueParseException * malformed issue description */ private Map<String, Date> parseAcraOccurrencesTable(final String pDescription, final String pStacktraceMD5) throws IssueParseException { final Map<String, Date> occur = new HashMap<String, Date>(); // escape braces { and } to use strings in regexp final String header = IssueDescriptionUtilsV1.getOccurrencesTableHeader(); final String escHeader = Pattern.quote(header); // regexp to find occurrences tables final Pattern p = Pattern.compile(escHeader + IssueDescriptionUtilsV1.EOL + "(?:" + OCCURR_LINE_PATTERN + IssueDescriptionUtilsV1.EOL + "+)+", Pattern.DOTALL | Pattern.CASE_INSENSITIVE); final Matcher m = p.matcher(pDescription); if (m.find()) { // regexp to find occurrences lines final Pattern pLine = Pattern.compile(OCCURR_LINE_PATTERN); final Matcher mLine = pLine.matcher(m.group()); while (mLine.find()) { final StringTokenizer line = new StringTokenizer(mLine.group(), "|"); final String acraReportId = line.nextToken(); final String acraUserCrashDate = line.nextToken(); try { occur.put(acraReportId, IssueDescriptionUtilsV1.parseDate(acraUserCrashDate)); } catch (final ParseException e) { throw new IssueParseException("Unable to parse user crash date of ACRA report " + acraReportId, e); } } } else { throw new IssueParseException("No crash occurrence table found in the description"); } if (m.find()) { throw new IssueParseException("More than 1 occurrence table found in the description"); } if (MapUtils.isEmpty(occur)) { throw new IssueParseException("0 user crash occurrence found in the description"); } return occur; }
From source file:org.languagetool.tools.RuleAsXmlSerializerTest.java
@Test public void testRuleMatchesToXML() throws IOException { final List<RuleMatch> matches = new ArrayList<>(); final String text = "This is an test sentence. Here's another sentence with more text."; final FakeRule rule = new FakeRule(); final RuleMatch match = new RuleMatch(rule, 8, 10, "myMessage"); match.setColumn(99);/*w ww. jav a2 s .c om*/ match.setEndColumn(100); match.setLine(44); match.setEndLine(45); matches.add(match); final String xml = SERIALIZER.ruleMatchesToXml(matches, text, 5, NORMAL_XML, LANG, Collections.<String>emptyList()); assertTrue(xml.startsWith("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")); final Pattern matchesPattern = Pattern.compile(".*<matches software=\"LanguageTool\" version=\"" + JLanguageTool.VERSION + "\" buildDate=\".*?\">.*", Pattern.DOTALL); final Matcher matcher = matchesPattern.matcher(xml); assertTrue("Did not find expected '<matches>' element, got: " + xml, matcher.matches()); assertTrue(xml.contains(">\n" + "<error fromy=\"44\" fromx=\"98\" toy=\"45\" tox=\"99\" ruleId=\"FAKE_ID\" msg=\"myMessage\" " + "replacements=\"\" context=\"...s is an test...\" contextoffset=\"8\" offset=\"8\" errorlength=\"2\" " + "locqualityissuetype=\"misspelling\"/>\n" + "</matches>\n")); }
From source file:stroom.xml.converter.ds3.ConfigFilter.java
@Override public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { if (localName.equals(XML_ELEMENT_DATA_SPLITTER)) { inRoot = true;/* ww w. j av a 2s .com*/ if (parentDeque.peek().getNodeType() == NodeType.ROOT) { final RootFactory rootFactory = (RootFactory) parentDeque.peek(); rootFactory.setBufferSize(getInt(atts, XML_ATTRIBUTE_BUFFER_SIZE, RootFactory.DEFAULT_BUFFER_SIZE)); rootFactory.setIgnoreErrors(getIgnoreErorrs(atts)); } } else if (!inRoot) { throw new SAXException("Unknown root element \"" + localName + "\""); } else if (localName.equals(XML_ELEMENT_SPLIT)) { final String id = atts.getValue(XML_ATTRIBUTE_ID); final String delimiter = unescape(atts.getValue(XML_ATTRIBUTE_DELIMITER)); final String escape = atts.getValue(XML_ATTRIBUTE_ESCAPE); final String containerStart = atts.getValue(XML_ATTRIBUTE_CONTAINER_START); final String containerEnd = atts.getValue(XML_ATTRIBUTE_CONTAINER_END); final NodeFactory parent = parentDeque.peek(); final NodeFactory node = new SplitFactory(parent, id, getMinMatch(atts), getMaxMatch(atts), getOnlyMatch(atts), delimiter, escape, containerStart, containerEnd); parentDeque.push(node); } else if (localName.equals(XML_ELEMENT_REGEX)) { final String id = atts.getValue(XML_ATTRIBUTE_ID); final String pattern = atts.getValue(XML_ATTRIBUTE_PATTERN); final boolean caseInsensitive = getBool(atts, XML_ATTRIBUTE_CASE_INSENSITIVE, false); final boolean dotAll = getBool(atts, XML_ATTRIBUTE_DOT_ALL, false); final int advance = getInt(atts, XML_ATTRIBUTE_ADVANCE, -1); int flags = 0; if (caseInsensitive) { flags += Pattern.CASE_INSENSITIVE; } if (dotAll) { flags += Pattern.DOTALL; } final NodeFactory parent = parentDeque.peek(); final NodeFactory node = new RegexFactory(parent, id, getMinMatch(atts), getMaxMatch(atts), getOnlyMatch(atts), advance, pattern, flags); parentDeque.push(node); } else if (localName.equals(XML_ELEMENT_ALL)) { final String id = atts.getValue(XML_ATTRIBUTE_ID); final NodeFactory parent = parentDeque.peek(); final NodeFactory node = new AllFactory(parent, id); parentDeque.push(node); } else if (localName.equals(XML_ELEMENT_GROUP)) { final String id = atts.getValue(XML_ATTRIBUTE_ID); final String value = atts.getValue(XML_ATTRIBUTE_VALUE); final NodeFactory parent = parentDeque.peek(); final NodeFactory node = new GroupFactory(parent, id, value, getReverse(atts), getMatchOrder(atts), getIgnoreErorrs(atts)); parentDeque.push(node); } else if (localName.equals(XML_ELEMENT_VAR)) { final String id = atts.getValue(XML_ATTRIBUTE_ID); final NodeFactory parent = parentDeque.peek(); final NodeFactory node = new VarFactory(parent, id); parentDeque.push(node); } else if (localName.equals(XML_ELEMENT_DATA)) { final String id = atts.getValue(XML_ATTRIBUTE_ID); final String name = atts.getValue(XML_ATTRIBUTE_NAME); final String value = atts.getValue(XML_ATTRIBUTE_VALUE); final NodeFactory parent = parentDeque.peek(); final NodeFactory node = new DataFactory(parent, id, name, value); parentDeque.push(node); } super.startElement(uri, localName, qName, atts); }
From source file:org.geotools.data.couchdb.client.CouchDBUtils.java
public static String stripComments(String json) { Pattern pat = Pattern.compile("/\\*(?:.)*?\\*/", Pattern.MULTILINE | Pattern.DOTALL); return pat.matcher(json).replaceAll(""); }
From source file:net.riezebos.thoth.configuration.persistence.dbs.DDLExecuter.java
protected String applyDialect(String command) { String timestampRe = "\\{fn CURRENT_TIMESTAMP\\}"; command = command.replaceAll(timestampRe, databaseIdiom.getCurrentTimeDDL()); // Only translate create/alter statements boolean createOrAlter = command.toLowerCase().startsWith("create") || command.toLowerCase().startsWith("alter") || command.toLowerCase().startsWith("rename") || command.toLowerCase().startsWith("drop"); if (!createOrAlter) return command; for (TranslationPair wa : translations) { if (!wa.exclude(command)) { command = ThothUtil.replaceWord(command + " ", wa.getFrom(), wa.getTo(), SEPARATORS).trim(); }/*w w w. java2s . c o m*/ } for (TranslationPair wa : workarounds) { Pattern pattern = Pattern.compile(wa.getFrom(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL); command = pattern.matcher(command).replaceAll(wa.getTo()); } command = databaseIdiom.applyDialect(command); return command; }