List of usage examples for java.util.regex Matcher replaceAll
public String replaceAll(Function<MatchResult, String> replacer)
From source file:Normalization.TextNormalization.java
public String removeSpacesFromString(String content) { String utf8tweet = ""; try {/*from w w w .j ava2 s. c o m*/ byte[] utf8Bytes = content.getBytes("UTF-8"); utf8tweet = new String(utf8Bytes, "UTF-8"); } catch (UnsupportedEncodingException e) { } final String regex = "\\s{2,}"; final Pattern unicodeOutliers = Pattern.compile(regex, Pattern.MULTILINE | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); Matcher unicodeOutlierMatcher = unicodeOutliers.matcher(utf8tweet); utf8tweet = unicodeOutlierMatcher.replaceAll(" "); return utf8tweet; }
From source file:Normalization.TextNormalization.java
public String removeTwoLetterWordsFromString(String content) { String utf8tweet = ""; try {/* w w w . ja v a 2 s . c om*/ byte[] utf8Bytes = content.getBytes("UTF-8"); utf8tweet = new String(utf8Bytes, "UTF-8"); } catch (UnsupportedEncodingException e) { } final String regex = "((^|\\s)(\\w{1,2})(\\s|$))"; final Pattern unicodeOutliers = Pattern.compile(regex, Pattern.MULTILINE | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); Matcher unicodeOutlierMatcher = unicodeOutliers.matcher(utf8tweet); utf8tweet = unicodeOutlierMatcher.replaceAll(" "); return utf8tweet; }
From source file:Normalization.TextNormalization.java
public String removeNonEnglishWordsFromString(String content) { String utf8tweet = ""; try {/*from www . j a va 2 s . c om*/ byte[] utf8Bytes = content.getBytes("UTF-8"); utf8tweet = new String(utf8Bytes, "UTF-8"); } catch (UnsupportedEncodingException e) { } final String regex = "[\\W]"; final Pattern unicodeOutliers = Pattern.compile(regex, Pattern.MULTILINE | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); Matcher unicodeOutlierMatcher = unicodeOutliers.matcher(utf8tweet); utf8tweet = unicodeOutlierMatcher.replaceAll(" "); return utf8tweet; }
From source file:org.dspace.app.statistics.LogAnalyser.java
/** * using the pre-configuration information passed here, analyse the logs * and produce the aggregation file//from w ww. jav a 2 s. com * * @param context the DSpace context object this occurs under * @param myLogDir the passed log directory. Uses default if null * @param myFileTemplate the passed file name regex. Uses default if null * @param myConfigFile the DStat config file. Uses default if null * @param myOutFile the file to which to output aggregation data. Uses default if null * @param myStartDate the desired start of the analysis. Starts from the beginning otherwise * @param myEndDate the desired end of the analysis. Goes to the end otherwise * @param myLookUp force a lookup of the database * @return aggregate output * @throws IOException if IO error * @throws SQLException if database error * @throws SearchServiceException if search error */ public static String processLogs(Context context, String myLogDir, String myFileTemplate, String myConfigFile, String myOutFile, Date myStartDate, Date myEndDate, boolean myLookUp) throws IOException, SQLException, SearchServiceException { // FIXME: perhaps we should have all parameters and aggregators put // together in a single aggregating object // if the timer has not yet been started, then start it startTime = new GregorianCalendar(); //instantiate aggregators actionAggregator = new HashMap<String, Integer>(); searchAggregator = new HashMap<String, Integer>(); userAggregator = new HashMap<String, Integer>(); itemAggregator = new HashMap<String, Integer>(); archiveStats = new HashMap<String, Integer>(); //instantiate lists generalSummary = new ArrayList<String>(); excludeWords = new ArrayList<String>(); excludeTypes = new ArrayList<String>(); excludeChars = new ArrayList<String>(); itemTypes = new ArrayList<String>(); // set the parameters for this analysis setParameters(myLogDir, myFileTemplate, myConfigFile, myOutFile, myStartDate, myEndDate, myLookUp); // pre prepare our standard file readers and buffered readers FileReader fr = null; BufferedReader br = null; // read in the config information, throwing an error if we fail to open // the given config file readConfig(configFile); // assemble the regular expressions for later use (requires the file // template to build the regex to match it setRegex(fileTemplate); // get the log files File[] logFiles = getLogFiles(logDir); // standard loop counter int i = 0; // for every log file do analysis // FIXME: it is easy to implement not processing log files after the // dates exceed the end boundary, but is there an easy way to do it // for the start of the file? Note that we can assume that the contents // of the log file are sequential, but can we assume the files are // provided in a data sequence? for (i = 0; i < logFiles.length; i++) { // check to see if this file is a log file agains the global regex Matcher matchRegex = logRegex.matcher(logFiles[i].getName()); if (matchRegex.matches()) { // if it is a log file, open it up and lets have a look at the // contents. try { fr = new FileReader(logFiles[i].toString()); br = new BufferedReader(fr); } catch (IOException e) { System.out.println("Failed to read log file " + logFiles[i].toString()); System.exit(0); } // for each line in the file do the analysis // FIXME: perhaps each section needs to be dolled out to an // analysing class to allow pluggability of other methods of // analysis, and ease of code reading too - Pending further thought String line = null; while ((line = br.readLine()) != null) { // get the log line object LogLine logLine = getLogLine(line); // if there are line segments get on with the analysis if (logLine != null) { // first find out if we are constraining by date and // if so apply the restrictions if ((startDate != null) && (!logLine.afterDate(startDate))) { continue; } if ((endDate != null) && (!logLine.beforeDate(endDate))) { break; } // count the number of lines parsed lineCount++; // if we are not constrained by date, register the date // as the start/end date if it is the earliest/latest so far // FIXME: this should probably have a method of its own if (startDate == null) { if (logStartDate != null) { if (logLine.beforeDate(logStartDate)) { logStartDate = logLine.getDate(); } } else { logStartDate = logLine.getDate(); } } if (endDate == null) { if (logEndDate != null) { if (logLine.afterDate(logEndDate)) { logEndDate = logLine.getDate(); } } else { logEndDate = logLine.getDate(); } } // count the warnings if (logLine.isLevel("WARN")) { // FIXME: really, this ought to be some kind of level // aggregator warnCount++; } // count the exceptions if (logLine.isLevel("ERROR")) { excCount++; } if (null == logLine.getAction()) { continue; } // is the action a search? if (logLine.isAction("search")) { // get back all the valid search words from the query String[] words = analyseQuery(logLine.getParams()); // for each search word add to the aggregator or // increment the aggregator's counter for (int j = 0; j < words.length; j++) { // FIXME: perhaps aggregators ought to be objects // themselves searchAggregator.put(words[j], increment(searchAggregator, words[j])); } } // is the action a login, and are we counting user logins? if (logLine.isAction("login") && !userEmail.equals("off")) { userAggregator.put(logLine.getUser(), increment(userAggregator, logLine.getUser())); } // is the action an item view? if (logLine.isAction("view_item")) { String handle = logLine.getParams(); // strip the handle string Matcher matchHandle = handleRX.matcher(handle); handle = matchHandle.replaceAll(""); // strip the item id string Matcher matchItem = itemRX.matcher(handle); handle = matchItem.replaceAll("").trim(); // either add the handle to the aggregator or // increment its counter itemAggregator.put(handle, increment(itemAggregator, handle)); } // log all the activity actionAggregator.put(logLine.getAction(), increment(actionAggregator, logLine.getAction())); } } // close the file reading buffers br.close(); fr.close(); } } // do we want to do a database lookup? Do so only if the start and // end dates are null or lookUp is true // FIXME: this is a kind of separate section. Would it be worth building // the summary string separately and then inserting it into the real // summary later? Especially if we make the archive analysis more complex archiveStats.put("All Items", getNumItems(context)); for (i = 0; i < itemTypes.size(); i++) { archiveStats.put(itemTypes.get(i), getNumItems(context, itemTypes.get(i))); } // now do the host name and url lookup hostName = ConfigurationManager.getProperty("dspace.hostname").trim(); name = ConfigurationManager.getProperty("dspace.name").trim(); url = ConfigurationManager.getProperty("dspace.url").trim(); if ((url != null) && (!url.endsWith("/"))) { url = url + "/"; } // do the average views analysis if ((archiveStats.get("All Items")).intValue() != 0) { // FIXME: this is dependent on their being a query on the db, which // there might not always be if it becomes configurable Double avg = Math.ceil((actionAggregator.get("view_item")).doubleValue() / (archiveStats.get("All Items")).doubleValue()); views = avg.intValue(); } // finally, write the output return createOutput(); }
From source file:edu.harvard.i2b2.pm.ws.PMService.java
public OMElement getVersion(OMElement getPMDataElement) throws I2B2Exception, JAXBUtilException { Pattern p = Pattern.compile("<password>.+</password>"); Matcher m = p.matcher(getPMDataElement.toString()); String outString = m.replaceAll("<password>*********</password>"); p = Pattern.compile(">.+</ns9:set_password>"); m = p.matcher(outString);//w w w . j a v a 2s .c om outString = m.replaceAll(">*********</ns9:set_password>"); log.debug("Received Request PM Element " + outString); OMElement returnElement = null; if (getPMDataElement == null) { log.error("Incoming Version request is null"); throw new I2B2Exception("Incoming Version request is null"); } VersionMessage servicesMsg = new VersionMessage(getPMDataElement.toString()); String version = servicesMsg.getRequestMessageType().getMessageBody().getGetMessageVersion().toString(); if (version.equals("")) { edu.harvard.i2b2.pm.datavo.i2b2versionmessage.ResponseMessageType pmDataResponse = new edu.harvard.i2b2.pm.datavo.i2b2versionmessage.ResponseMessageType(); edu.harvard.i2b2.pm.datavo.i2b2versionmessage.ResponseMessageType.MessageBody mb = new edu.harvard.i2b2.pm.datavo.i2b2versionmessage.ResponseMessageType.MessageBody(); mb.setI2B2MessageVersion(msgVersion); pmDataResponse.setMessageBody(mb); String xmlMsg = MessageFactory.convertToXMLString(pmDataResponse); try { returnElement = MessageFactory.createResponseOMElementFromString(xmlMsg); log.debug("my pm repsonse is: " + pmDataResponse); log.debug("my return is: " + returnElement); } catch (XMLStreamException e) { log.error("Error creating OMElement from response string " + pmDataResponse, e); } } return returnElement; }
From source file:com.khs.sherpa.processor.RestfulRequestProcessor.java
public String getAction(HttpServletRequest request) { final Pattern pattern = Pattern.compile("\\{\\d?\\w+\\}"); Matcher matcher = null; if (method.isAnnotationPresent(Action.class)) { for (String url : method.getAnnotation(Action.class).mapping()) { matcher = pattern.matcher(url); if (Pattern.matches(matcher.replaceAll("[^/]*"), UrlUtil.getPath(request))) { path = url;/*from www . j a v a 2 s .com*/ } } } return MethodUtil.getMethodName(method); }
From source file:org.lanes.text.mining.Conceptualiser.java
public String mapTermToConcept(String term) { long timestart = System.currentTimeMillis(); String mappedconcept = ""; try {/*from w ww . j av a 2s.co m*/ List<String> parents = new ArrayList<String>(); if (mappedconcept.equals("")) { //System.err.println("matchExact"); mappedconcept = matchExact(term); if (!mappedconcept.equals("")) { parents = findNeighbours(mappedconcept, "HYPERNYMY"); } //System.err.println("mappedconcept: " + mappedconcept + ", parents.size: " + parents.size() + ""); } if ((mappedconcept.equals("")) || (!mappedconcept.equals("") && parents.size() == 0)) { //System.err.println("matchSynonym"); if (mappedconcept.equals("")) { mappedconcept = matchSynonym(term); } else { mappedconcept = matchSynonym(mappedconcept); } if (!mappedconcept.equals("")) { parents = findNeighbours(mappedconcept, "HYPERNYMY"); } } //System.err.println("mappedconcept: " + mappedconcept + ", parents.size: " + parents.size() + ""); if ((!mappedconcept.equals("") && parents.size() == 0)) { //System.err.println("resolvePolysemy"); mappedconcept = resolvePolysemy(mappedconcept, term, findNeighbours(mappedconcept, "POLYSEMY")); //System.err.println("mappedconcept: " + mappedconcept + ", parents.size: " + parents.size() + ""); } } catch (Exception e) { } Matcher replace = Pattern.compile("\\s").matcher(mappedconcept); mappedconcept = replace.replaceAll("_"); return mappedconcept; }
From source file:at.ac.tuwien.inso.subcat.utility.commentparser.Parser.java
License:asdf
private void parseParagraph(List<ContentNode<T>> ast, String para, int paragraphSeparatorSize) { if (paragraphIsArtefact(para)) { ast.add(new ArtefactNode<T>(para)); return;/* ww w . j ava2 s . co m*/ } Matcher normM = pNorm.matcher(para); String paraOut = normM.replaceAll(" "); if (paraOut.length() != 0) { ast.add(new ParagraphNode<T>(paraOut, para, paragraphSeparatorSize)); } }
From source file:kml.feature.Feature.java
public String stripHtml(String string) { if (string == null || string.length() == 0) { return string; }/*from ww w . j a va 2 s. c om*/ Matcher m = REMOVE_TAGS.matcher(string); return m.replaceAll(""); }
From source file:edu.lternet.pasta.datapackagemanager.DataPackageManagerResourceTest.java
public static void modifyTestEmlFile(String testScope, File testEmlFile, String newPackageId) { String xmlString = FileUtility.fileToString(testEmlFile); Pattern pattern = Pattern.compile(testScope + "\\.\\d+\\.\\d+"); Matcher matcher = pattern.matcher(xmlString); // Replace packageId value with new packageId value String modifiedXmlString = matcher.replaceAll(newPackageId); FileWriter fileWriter;//from ww w. j a v a2 s . c o m try { fileWriter = new FileWriter(testEmlFile); StringBuffer stringBuffer = new StringBuffer(modifiedXmlString); IOUtil.writeToWriter(stringBuffer, fileWriter, true); } catch (IOException e) { fail("IOException modifying packageId in test EML file: " + e.getMessage()); } }