List of usage examples for java.util.regex Pattern COMMENTS
int COMMENTS
To view the source code for java.util.regex Pattern COMMENTS.
Click Source Link
From source file:com.google.code.configprocessor.processing.ModifyAction.java
protected int parseFlags() { int flagsToUse = 0; String flagsToTest = getFlags() == null ? DEFAULT_PATTERN_FLAGS : getFlags(); String[] flagArray = StringUtils.split(flagsToTest, PATTERN_FLAG_SEPARATOR); for (String flag : flagArray) { if ("UNIX_LINES".equals(flag)) { flagsToUse |= Pattern.UNIX_LINES; } else if ("CASE_INSENSITIVE".equals(flag)) { flagsToUse |= Pattern.CASE_INSENSITIVE; } else if ("COMMENTS".equals(flag)) { flagsToUse |= Pattern.COMMENTS; } else if ("MULTILINE".equals(flag)) { flagsToUse |= Pattern.MULTILINE; } else if ("LITERAL".equals(flag)) { flagsToUse |= Pattern.LITERAL; } else if ("DOTALL".equals(flag)) { flagsToUse |= Pattern.DOTALL; } else if ("UNICODE_CASE".equals(flag)) { flagsToUse |= Pattern.UNICODE_CASE; } else if ("CANON_EQ".equals(flag)) { flagsToUse |= Pattern.CANON_EQ; } else {//from w w w. j a va2s . com throw new IllegalArgumentException("Unknown flag: " + flag); } } return flagsToUse; }
From source file:pl.otros.logview.gui.message.pattern.PropertyPatternMessageColorizer.java
public void init(InputStream in) throws ConfigurationException { propertiesConfiguration = new PropertiesConfiguration(); propertiesConfiguration.setDelimiterParsingDisabled(true); propertiesConfiguration.load(in, "UTF-8"); configuration = new DataConfiguration(propertiesConfiguration); configuration.setDelimiterParsingDisabled(true); String pa = configuration.getString(PROP_PATTERN); int flags = 0; flags = flags | (configuration.getBoolean(PROP_PATTERN_CANON_EQ, false) ? Pattern.CANON_EQ : 0); flags = flags/*from w ww . ja v a 2s . c o m*/ | (configuration.getBoolean(PROP_PATTERN_CASE_INSENSITIVE, false) ? Pattern.CASE_INSENSITIVE : 0); flags = flags | (configuration.getBoolean(PROP_PATTERN_COMMENTS, false) ? Pattern.COMMENTS : 0); flags = flags | (configuration.getBoolean(PROP_PATTERN_DOTALL, false) ? Pattern.DOTALL : 0); flags = flags | (configuration.getBoolean(PROP_PATTERN_LITERAL, false) ? Pattern.LITERAL : 0); flags = flags | (configuration.getBoolean(PROP_PATTERN_MULTILINE, false) ? Pattern.MULTILINE : 0); flags = flags | (configuration.getBoolean(PROP_PATTERN_UNICODE_CASE, false) ? Pattern.UNICODE_CASE : 0); flags = flags | (configuration.getBoolean(PROP_PATTERN_UNIX_LINES, false) ? Pattern.UNIX_LINES : 0); pattern = Pattern.compile(pa, flags); groupCount = countGroups(pattern); name = configuration.getString(PROP_NAME, "NAME NOT SET!"); description = configuration.getString(PROP_DESCRIPTION, "DESCRIPTION NOT SET!"); testMessage = configuration.getString(PROP_TEST_MESSAGE, ""); version = configuration.getInt(PROP_VERSION, 1); }
From source file:org.sleuthkit.autopsy.recentactivity.Util.java
public static String getFileName(String value) { String filename = ""; String filematch = "^([a-zA-Z]\\:)(\\\\[^\\\\/:*?<>\"|]*(?<!\\[ \\]))*(\\.[a-zA-Z]{2,6})$"; Pattern p = Pattern.compile(filematch, Pattern.CASE_INSENSITIVE | Pattern.DOTALL | Pattern.COMMENTS); Matcher m = p.matcher(value); if (m.find()) { filename = m.group(1);/* w w w .ja v a 2 s. c o m*/ } int lastPos = value.lastIndexOf('\\'); filename = (lastPos < 0) ? value : value.substring(lastPos + 1); return filename.toString(); }
From source file:org.sleuthkit.autopsy.recentactivity.Util.java
public static String getPath(String txt) { String path = ""; //String drive ="([a-z]:\\\\(?:[-\\w\\.\\d]+\\\\)*(?:[-\\w\\.\\d]+)?)"; // Windows drive String drive = "([a-z]:\\\\\\S.+)"; Pattern p = Pattern.compile(drive, Pattern.CASE_INSENSITIVE | Pattern.COMMENTS); Matcher m = p.matcher(txt);//from ww w.j av a 2s . c o m if (m.find()) { path = m.group(1); } else { String network = "(\\\\(?:\\\\[^:\\s?*\"<>|]+)+)"; // Windows network Pattern p2 = Pattern.compile(network, Pattern.CASE_INSENSITIVE | Pattern.DOTALL); Matcher m2 = p2.matcher(txt); if (m2.find()) { path = m2.group(1); } } return path; }
From source file:com.ikanow.aleph2.enrichment.utils.services.SimpleRegexFilterService.java
/** * Converts a string of regex flags into a single int representing those * flags for using in the java Pattern object * //from w w w . j a va 2s . co m * @param flagsStr * @return */ public static int parseFlags(final String flagsStr) { int flags = 0; for (int i = 0; i < flagsStr.length(); ++i) { switch (flagsStr.charAt(i)) { case 'i': flags |= Pattern.CASE_INSENSITIVE; break; case 'x': flags |= Pattern.COMMENTS; break; case 's': flags |= Pattern.DOTALL; break; case 'm': flags |= Pattern.MULTILINE; break; case 'u': flags |= Pattern.UNICODE_CASE; break; case 'd': flags |= Pattern.UNIX_LINES; break; } } return flags; }
From source file:org.montp2.m1decol.ter.preprocessing.GlobalPreProcessing.java
public Map<Integer, Integer> getMapOfInstanceArffToIdUser(String dirPath, String inArff) throws Exception { Pattern pattern = Pattern.compile("^( \\p{Print}+ ) (_(\\p{Digit}+).txt)$", Pattern.COMMENTS); Map<Integer, String> arffIds = new HashMap<Integer, String>(); for (File file : FileUtils.ls(dirPath)) { Matcher matcher = pattern.matcher(file.getAbsolutePath()); matcher.matches();// w w w. ja va 2s. c o m arffIds.put(Integer.parseInt(matcher.group(3)), InputStreamUtils.readInputStream(new BufferedInputStream(new FileInputStream(file)))); } Instances instances = WekaUtils.loadARFF(inArff); Enumeration<Instance> en = instances.enumerateInstances(); Map<Integer, Integer> arffTOIdUser = new HashMap<Integer, Integer>(); int index = 0; while (en.hasMoreElements()) { String value = en.nextElement().toString(); value = value.substring(1, value.length() - 1); String works[] = value.split("\\s"); for (Map.Entry<Integer, String> arff : arffIds.entrySet()) { String wordArff[] = arff.getValue().split("\\s"); boolean isEqual = true; if (wordArff.length == works.length) { for (int j = 0; j < wordArff.length; j++) { if (!wordArff[j].equals(works[j])) { isEqual = false; break; } } if (isEqual) { arffTOIdUser.put(index, arff.getKey()); break; } } } index++; } return arffTOIdUser; }
From source file:org.executequery.gui.importexport.ImportDelimitedWorker.java
private Object doWork() { // the process result String processResult = null;/* w ww. j a va 2 s.co m*/ // are we halting on any error int onError = getParent().getOnError(); haltOnError = (onError == ImportExportProcess.STOP_TRANSFER); boolean isBatch = getParent().runAsBatchProcess(); appendProgressText("Beginning import from delimited file process..."); appendProgressText("Using connection: " + getParent().getDatabaseConnection().getName()); // --------------------------------------- // table specific counters // the table statement result int tableInsertCount = 0; // the records processed for this table int tableRowCount = 0; // the table commit count int tableCommitCount = 0; // --------------------------------------- // total import process counters // the current commit block size int commitCount = 0; // the total records inserted int totalInsertCount = 0; // the total records processed int totalRecordCount = 0; // the error count int errorCount = 0; // the current line number int lineNumber = 0; int rollbackSize = getParent().getRollbackSize(); int rollbackCount = 0; FileReader fileReader = null; BufferedReader reader = null; DateFormat dateFormat = null; try { // retrieve the import files Vector files = getParent().getDataFileVector(); int fileCount = files.size(); // whether to trim whitespace boolean trimWhitespace = getParent().trimWhitespace(); // whether this table has a date/time field boolean hasDate = false; // whether we are parsing date formats boolean parsingDates = parseDateValues(); // column names are first row boolean hasColumnNames = getParent().includeColumnNames(); // currently bound variables in the prepared statement Map<ColumnData, String> boundVariables = null; // ignored indexes of columns from the file List<Integer> ignoredIndexes = null; if (hasColumnNames) { boundVariables = new HashMap<ColumnData, String>(); ignoredIndexes = new ArrayList<Integer>(); appendProgressText("Using column names from input file's first row."); } // columns to be imported that are in the file Map<ColumnData, String> fileImportedColumns = new HashMap<ColumnData, String>(); // whether the data format failed (switch structure) boolean failed = false; // define the delimiter String delim = getParent().getDelimiter(); // --------------------------- // --- initialise counters --- // --------------------------- // the table's column count int columnCount = -1; // the length of each line in the file int rowLength = -1; // progress bar values int progressStatus = -1; // ongoing progress value int progressCheck = -1; // the import file size long fileSize = -1; // set the date format if (parseDateValues()) { try { dateFormat = createDateFormatter(); } catch (IllegalArgumentException e) { errorCount++; outputExceptionError("Error applying date mask", e); return FAILED; } } // record the start time start(); // setup the regex matcher for delims // ---------------------------------------------------------------- // below was the original pattern from oreilly book. // discovered issues when parsing values with quotes // in them - not only around them. /* String regex = "(?:^|\\" + delim + ") (?: \" ( (?> [^\"]*+ ) (?> \"\" [^\"]*+ )*+ ) \" | ( [^\"\\" + delim + "]*+ ) )"; Matcher matcher = Pattern.compile(regex, Pattern.COMMENTS).matcher(""); Matcher qMatcher = Pattern.compile("\"\"", Pattern.COMMENTS).matcher(""); */ // ---------------------------------------------------------------- // modified above to regex below // added the look-ahead after the close quote // and removed the quote from the last regex pattern String escapedDelim = escapeDelim(delim); String regex = "(?:^|" + escapedDelim + ") (?: \" ( (?> [^\"]*+ ) (?> \"\" [^\"]*+ )*+ ) \"(?=" + escapedDelim + "?) | ( [^" + escapedDelim + "]*+ ) )"; // ---------------------------------------------------------------- // changed above to the following - seems to work for now // regex pattern in full - where <delim> is the delimiter to use // \"([^\"]+?)\"<delim>?|([^<delim>]+)<delim>?|<delim> // // fixed oreilly one - not running this one // ---------------------------------------------------------------- Matcher matcher = Pattern.compile(regex, Pattern.COMMENTS).matcher(""); Matcher qMatcher = Pattern.compile("\"\"", Pattern.COMMENTS).matcher(""); // ---------------------------------------- // --- begin looping through the tables --- // ---------------------------------------- // ensure the connection has auto-commit to false conn = getConnection(); conn.setAutoCommit(false); int currentRowLength = 0; boolean insertLine = false; // the number of columns actually available in the file int filesColumnCount = 0; for (int i = 0; i < fileCount; i++) { lineNumber = 0; tableInsertCount = 0; tableCommitCount = 0; rollbackCount = 0; tableRowCount = 0; rowLength = 0; if (Thread.interrupted()) { setProgressStatus(100); throw new InterruptedException(); } tableCount++; DataTransferObject dto = (DataTransferObject) files.elementAt(i); // initialise the file object File inputFile = new File(dto.getFileName()); outputBuffer.append("---------------------------\nTable: "); outputBuffer.append(dto.getTableName()); outputBuffer.append("\nImport File: "); outputBuffer.append(inputFile.getName()); appendProgressText(outputBuffer); // setup the reader objects fileReader = new FileReader(inputFile); reader = new BufferedReader(fileReader); // retrieve the columns to be imported (or all) Vector<ColumnData> columns = getColumns(dto.getTableName()); columnCount = columns.size(); filesColumnCount = columnCount; // the wntire row read String row = null; // the current delimited value String value = null; // the ignored column count int ignoredCount = 0; // clear the file columns cache fileImportedColumns.clear(); // if the first row in the file has the column // names compare these with the columns selected if (hasColumnNames) { // init the bound vars cache with the selected columns boundVariables.clear(); for (int k = 0; k < columnCount; k++) { boundVariables.put(columns.get(k), VARIABLE_NOT_BOUND); } row = reader.readLine(); lineNumber++; String[] _columns = MiscUtils.splitSeparatedValues(row, delim); if (_columns != null && _columns.length > 0) { filesColumnCount = _columns.length; // -------------------------------------- // first determine if we have any columns in the // input file that were not selected for import // reset the ignored columns ignoredIndexes.clear(); // set up another list to re-add the columns in // the order in which they appear in the file. // all other columns will be added to the end Vector<ColumnData> temp = new Vector<ColumnData>(columnCount); ColumnData cd = null; int ignoredIndex = -1; for (int j = 0; j < _columns.length; j++) { ignoredIndex = j; String column = _columns[j]; for (int k = 0; k < columnCount; k++) { cd = columns.get(k); String _column = cd.getColumnName(); if (_column.equalsIgnoreCase(column)) { temp.add(cd); fileImportedColumns.put(cd, INCLUDED_COLUMN); ignoredIndex = -1; break; } } if (ignoredIndex != -1) { ignoredIndexes.add(Integer.valueOf(ignoredIndex)); } } ignoredCount = ignoredIndexes.size(); // if we didn't find any columns at all, show warning if (temp.isEmpty()) { String message = "No matching column names were " + "found within the specified file's first line.\n" + "The current file will be ignored."; outputBuffer.append(message); appendProgressWarningText(outputBuffer); int yesNo = GUIUtilities.displayYesNoDialog(message + "\nDo you wish to continue?", "Warning"); if (yesNo == JOptionPane.YES_OPTION) { continue; } else { throw new InterruptedException(); } } else { // add any other selected columns to the // end of the temp list with the columns // available in the file boolean addColumn = false; for (int k = 0; k < columnCount; k++) { addColumn = false; cd = columns.get(k); for (int j = 0, n = temp.size(); j < n; j++) { addColumn = true; if (temp.get(j) == cd) { addColumn = false; break; } } if (addColumn) { temp.add(cd); } } columns = temp; // note: size should not have changed } } } // otherwise just populate the columns in the file // with all the selected columns else { for (int j = 0; j < columnCount; j++) { fileImportedColumns.put(columns.get(j), INCLUDED_COLUMN); } } /* Log.debug("ignored count: " + ignoredCount); for (int j = 0; j < columnCount; j++) { Log.debug("column: " + columns.get(j)); } */ fileSize = inputFile.length(); progressStatus = 10; progressCheck = (int) (fileSize / progressStatus); // prepare the statement prepareStatement(dto.getTableName(), columns); if (parsingDates && dateFormat == null) { // check for a date data type for (int j = 0; j < columnCount; j++) { if (dateFormat == null && !hasDate) { ColumnData cd = columns.get(j); if (fileImportedColumns.containsKey(cd)) { if (cd.isDateDataType()) { hasDate = true; break; } } } } if (hasDate && dateFormat == null) { String pattern = verifyDate(); if (StringUtils.isNotBlank(pattern)) { fileReader.close(); setProgressStatus(100); throw new InterruptedException(); } dateFormat = createDateFormatter(pattern); } } rowLength = 0; while ((row = reader.readLine()) != null) { insertLine = true; lineNumber++; tableRowCount++; totalRecordCount++; if (Thread.interrupted()) { fileReader.close(); printTableResult(tableRowCount, tableCommitCount, dto.getTableName()); setProgressStatus(100); throw new InterruptedException(); } currentRowLength = row.length(); if (currentRowLength == 0) { outputBuffer.append("Line "); outputBuffer.append(lineNumber); outputBuffer.append(" contains no delimited values"); appendProgressWarningText(outputBuffer); int yesNo = GUIUtilities.displayYesNoDialog("No values provided from line " + lineNumber + " - the row is blank.\n" + "Do you wish to continue?", "Warning"); if (yesNo == JOptionPane.YES_OPTION) { continue; } else { throw new InterruptedException(); } } rowLength += currentRowLength; if (progressCheck < rowLength) { setProgressStatus(progressStatus); progressStatus += 10; rowLength = 0; } // reset matcher with current row matcher.reset(row); int index = 0; int lastIndex = -1; int loopIgnoredCount = 0; //Log.debug(row); for (int j = 0; j < filesColumnCount; j++) { if (matcher.find(index)) { String first = matcher.group(2); if (first != null) { value = first; } else { qMatcher.reset(matcher.group(1)); value = qMatcher.replaceAll("\""); } index = matcher.end(); // check if its an ignored column if (ignoredCount > 0) { if (isIndexIgnored(ignoredIndexes, j)) { loopIgnoredCount++; continue; } } } else { // not enough delims check if (j < (filesColumnCount - 1) && index > (currentRowLength - 1)) { outputBuffer.append("Insufficient number of column "); outputBuffer.append("values provided at line "); outputBuffer.append(lineNumber); appendProgressErrorText(outputBuffer); int yesNo = GUIUtilities .displayYesNoDialog("Insufficient number of values provided from line " + lineNumber + ".\n" + "Do you wish to continue?", "Warning"); if (yesNo == JOptionPane.YES_OPTION) { insertLine = false; break; } else { throw new InterruptedException(); } } else { // check if we're on a delim the matcher didn't pick up int delimLength = delim.length(); if (row.substring(index, index + delimLength).equals(delim)) { // increment index index++; // assign as null value value = null; } } } // check if we landed on the same index - likely null value if (index == lastIndex) { index++; } lastIndex = index; if (value != null && value.trim().length() == 0) { value = null; } try { ColumnData cd = columns.get(j - loopIgnoredCount); setValue(value, getIndexOfColumn(columns, cd) + 1, cd.getSQLType(), trimWhitespace, dateFormat); if (hasColumnNames) { boundVariables.put(cd, VARIABLE_BOUND); } } catch (ParseException e) { errorCount++; failed = true; outputBuffer.append("Error parsing date value - "); outputBuffer.append(value); outputBuffer.append(" - on line "); outputBuffer.append(lineNumber); outputBuffer.append(" at position "); outputBuffer.append(j); outputExceptionError(null, e); break; } catch (NumberFormatException e) { errorCount++; failed = true; outputBuffer.append("Error parsing value - "); outputBuffer.append(value); outputBuffer.append(" - on line "); outputBuffer.append(lineNumber); outputBuffer.append(" at position "); outputBuffer.append(j); outputExceptionError(null, e); break; } } if (!insertLine) { prepStmnt.clearParameters(); continue; } if (failed && haltOnError) { processResult = FAILED; break; } // execute the statement try { // check all variables are bound if we used // the column names from the first row if (hasColumnNames) { index = 0; // check all variables are bound - insert NULL otherwise for (Map.Entry<ColumnData, String> entry : boundVariables.entrySet()) { ColumnData cd = entry.getKey(); if (VARIABLE_NOT_BOUND.equals(entry.getValue())) { index = getIndexOfColumn(columns, cd); prepStmnt.setNull(index + 1, cd.getSQLType()); } } } if (isBatch) { prepStmnt.addBatch(); } else { int result = prepStmnt.executeUpdate(); tableInsertCount += result; commitCount += result; } rollbackCount++; // check the rollback segment if (rollbackCount == rollbackSize) { if (isBatch) { int result = getBatchResult(prepStmnt.executeBatch())[0]; tableInsertCount += result; commitCount += result; prepStmnt.clearBatch(); } conn.commit(); totalInsertCount += commitCount; tableCommitCount = tableInsertCount; rollbackCount = 0; commitCount = 0; } // reset bound variables if (hasColumnNames) { for (int k = 0; k < columnCount; k++) { boundVariables.put(columns.get(k), VARIABLE_NOT_BOUND); } } } catch (SQLException e) { logException(e); errorCount++; if (!isBatch) { outputBuffer.append("Error inserting data from line "); outputBuffer.append(lineNumber); outputExceptionError(null, e); } else { outputBuffer.append("Error on last batch execution"); outputExceptionError(null, e); } if (haltOnError) { processResult = FAILED; conn.rollback(); getParent().cancelTransfer(); throw new InterruptedException(); } } } // ---------------------------- // file/table has ended here if (isBatch) { int[] batchResult = null; try { batchResult = getBatchResult(prepStmnt.executeBatch()); int result = batchResult[0]; tableInsertCount += result; commitCount += result; tableCommitCount = tableInsertCount; } catch (BatchUpdateException e) { logException(e); int[] updateCounts = e.getUpdateCounts(); batchResult = getBatchResult(updateCounts); errorCount += batchResult[1]; if (errorCount == 0) { errorCount = 1; } outputBuffer.append("An error occured during the batch process: "); outputBuffer.append(e.getMessage()); SQLException _e = e.getNextException(); while (_e != null) { outputBuffer.append("\nNext Exception: "); outputBuffer.append(_e.getMessage()); _e = _e.getNextException(); } outputBuffer.append("\n\nRecords processed to the point "); outputBuffer.append("where this error occurred: "); outputBuffer.append(updateCounts.length); appendProgressErrorText(outputBuffer); processResult = FAILED; } // Log.debug("commitCount: " + commitCount + // " batch: " + batchResult[0]); if (tableRowCount != tableInsertCount) { conn.rollback(); if (onError == ImportExportProcess.STOP_TRANSFER) { getParent().cancelTransfer(); processResult = FAILED; throw new InterruptedException(); } } } boolean doCommit = true; if (failed && !isBatch && rollbackSize != ImportExportProcess.COMMIT_END_OF_ALL_FILES) { int yesNo = GUIUtilities.displayYesNoDialog( "The process completed with errors.\n" + "Do you wish to commit the last block?", "Confirm commit"); doCommit = (yesNo == JOptionPane.YES_OPTION); } // do the commit if ok from above // and if rollback size selected is end of file if (rollbackSize == ImportExportProcess.COMMIT_END_OF_FILE) { if (doCommit) { conn.commit(); totalInsertCount += commitCount; tableCommitCount = tableInsertCount; commitCount = 0; } else { conn.rollback(); } } // update the progress display printTableResult(tableRowCount, tableInsertCount, dto.getTableName()); setProgressStatus(100); // reset the checks hasDate = false; failed = false; } // commit the last remaining block or where // set to commit at the end of all files if (rollbackSize != ImportExportProcess.COMMIT_END_OF_FILE) { setProgressStatus(100); boolean doCommit = true; if (errorCount > 0 && errorCount != totalRecordCount) { int yesNo = GUIUtilities.displayYesNoDialog( "The process completed with errors.\n" + "Do you wish to commit the changes?", "Confirm commit"); doCommit = (yesNo == JOptionPane.YES_OPTION); } if (doCommit) { conn.commit(); totalInsertCount += commitCount; } else { conn.rollback(); } } processResult = SUCCESS; } catch (InterruptedException e) { if (processResult != FAILED) { processResult = CANCELLED; } try { if (prepStmnt != null) { prepStmnt.cancel(); } if (conn != null) { conn.rollback(); } } catch (SQLException e2) { outputExceptionError("Error rolling back transaction", e); } } catch (Exception e) { logException(e); outputBuffer.append("Error processing data from line "); outputBuffer.append(lineNumber); outputExceptionError("\nUnrecoverable error importing table data from file", e); int yesNo = GUIUtilities.displayYesNoDialog( "The process encountered errors.\n" + "Do you wish to commit the last transaction block?", "Confirm commit"); boolean doCommit = (yesNo == JOptionPane.YES_OPTION); try { if (doCommit) { conn.commit(); totalInsertCount += commitCount; } else { conn.rollback(); } } catch (SQLException e2) { logException(e2); outputExceptionError("Error processing last transaction block", e2); } processResult = FAILED; } finally { finish(); releaseResources(getParent().getDatabaseConnection()); if (totalRecordCount == 0 || errorCount > 0) { processResult = FAILED; } setTableCount(tableCount); setRecordCount(totalRecordCount); setRecordCountProcessed(totalInsertCount); setErrorCount(errorCount); setProgressStatus(100); GUIUtilities.scheduleGC(); if (reader != null) { try { reader.close(); } catch (IOException e) { } } if (fileReader != null) { try { fileReader.close(); } catch (IOException e) { } } if (prepStmnt != null) { try { prepStmnt.close(); } catch (SQLException e) { } } } return processResult; }
From source file:org.apache.hadoop.hive.ql.udf.generic.GenericUDFRegExpInstr.java
@Override public Object evaluate(DeferredObject[] arguments) throws HiveException { IntWritable intWritable = new IntWritable(0); int position = 1; int occurrence = 1; int return_option = 0; String match_parameter = "c"; if (arguments[0].get() == null || arguments[1].get() == null) { return null; }/* w w w. ja va 2 s .c o m*/ str = (String) converters[0].convert(arguments[0].get()); substr = (String) converters[1].convert(arguments[1].get()); String strFromPosition = str; if (arguments.length > 2) { position = ((IntObjectInspector) argumentOIs[2]).get(arguments[2].get()); if (position > str.length() || position < 1) { LOG.warn("illegal start position"); return intWritable; } if (arguments.length > 3) { occurrence = ((IntObjectInspector) argumentOIs[3]).get(arguments[3].get()); if (occurrence > str.length() || occurrence < 0) { LOG.warn("illegal occurrence"); return intWritable; } } if (arguments.length > 4) { return_option = ((IntObjectInspector) argumentOIs[4]).get(arguments[4].get()); if ((return_option != 0) && (return_option != 1)) { LOG.warn("illegal occurrence"); return intWritable; } } if (arguments.length > 5) { match_parameter = ((StringObjectInspector) argumentOIs[5]) .getPrimitiveJavaObject(arguments[5].get()); match_parameter = match_parameter.toLowerCase(); char[] tmp_char = match_parameter.toCharArray(); String tmp_str = "icnmx"; for (int i = 0; i < match_parameter.length(); i++) { if (!tmp_str.contains(String.valueOf(tmp_char[i]))) { LOG.warn("illegal match_parameter"); return intWritable; } } match_parameter = String.valueOf(tmp_char[match_parameter.length() - 1]); } } try { if (match_parameter.compareToIgnoreCase("i") == 0) { p = Pattern.compile(substr.toString(), Pattern.CASE_INSENSITIVE); } else if (match_parameter.compareToIgnoreCase("c") == 0) { p = Pattern.compile(substr.toString()); } else if (match_parameter.compareToIgnoreCase("n") == 0) { p = Pattern.compile(substr.toString(), Pattern.DOTALL); } else if (match_parameter.compareToIgnoreCase("m") == 0) { p = Pattern.compile(substr.toString(), Pattern.MULTILINE); } else if (match_parameter.compareToIgnoreCase("x") == 0) { p = Pattern.compile(substr.toString(), Pattern.COMMENTS); } else { p = Pattern.compile(substr.toString()); } if (position > 1) { strFromPosition = str.substring(position - 1); } Matcher m = p.matcher(strFromPosition.toString()); for (int i = 0; i < occurrence; i++) { if (m.find()) { if (return_option == 0) { intWritable.set(m.start() + position); } else { intWritable.set(m.end() + position); } } else { LOG.info("m.find() = " + m.find()); intWritable.set(0); break; } } return intWritable; } catch (PatternSyntaxException e) { LOG.info("PatternSyntaxException"); return intWritable; } }
From source file:org.montp2.m1decol.ter.preprocessing.GlobalPreProcessing.java
private Integer findFileByTxt(String dirPath, String txt) throws Exception { if (txt.length() > 12600) throw new IllegalArgumentException("Text very large for the command grep"); String[] command = { "grep", "-l", "-R", txt, dirPath }; Process process = new ProcessBuilder(command).start(); process.waitFor();// w w w . j ava 2 s . c o m String error = InputStreamUtils.readInputStream(process.getErrorStream()); if (error.length() > 0) throw new ProcessingException("The load all the message :" + error); String output = InputStreamUtils.readInputStream(process.getInputStream()); Pattern pattern = Pattern.compile("^( \\p{Print}+ ) (_(\\p{Digit}+).txt)$", Pattern.COMMENTS); Matcher matcher = pattern.matcher(output); if (matcher.matches()) { return Integer.parseInt(matcher.group(3)); } throw new NullPointerException("Not exist the user id"); }
From source file:org.wso2.carbon.identity.common.util.validation.ValidationUtils.java
public static boolean isValidFileName(String fileName) { String fileNameRegEx = ""; // read filename regex from identity.yaml if (isBlank(fileNameRegEx)) { fileNameRegEx = DEFAULT_FILE_NAME_REGEX; }/*from w w w . j a va 2s . c o m*/ Pattern pattern = Pattern.compile(fileNameRegEx, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.COMMENTS); Matcher matcher = pattern.matcher(fileName); return matcher.matches(); }