Example usage for java.util.regex Matcher replaceAll

List of usage examples for java.util.regex Matcher replaceAll

Introduction

In this page you can find the example usage for java.util.regex Matcher replaceAll.

Prototype

public String replaceAll(Function<MatchResult, String> replacer) 

Source Link

Document

Replaces every subsequence of the input sequence that matches the pattern with the result of applying the given replacer function to the match result of this matcher corresponding to that subsequence.

Usage

From source file:Normalization.TextNormalization.java

public String removeSpacesFromString(String content) {

    String utf8tweet = "";
    try {/*from w  w w  .j ava2  s.  c o m*/
        byte[] utf8Bytes = content.getBytes("UTF-8");

        utf8tweet = new String(utf8Bytes, "UTF-8");
    } catch (UnsupportedEncodingException e) {
    }

    final String regex = "\\s{2,}";
    final Pattern unicodeOutliers = Pattern.compile(regex,
            Pattern.MULTILINE | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);

    Matcher unicodeOutlierMatcher = unicodeOutliers.matcher(utf8tweet);
    utf8tweet = unicodeOutlierMatcher.replaceAll(" ");
    return utf8tweet;
}

From source file:Normalization.TextNormalization.java

public String removeTwoLetterWordsFromString(String content) {

    String utf8tweet = "";
    try {/* w w w . ja v a 2 s . c  om*/
        byte[] utf8Bytes = content.getBytes("UTF-8");

        utf8tweet = new String(utf8Bytes, "UTF-8");
    } catch (UnsupportedEncodingException e) {
    }

    final String regex = "((^|\\s)(\\w{1,2})(\\s|$))";
    final Pattern unicodeOutliers = Pattern.compile(regex,
            Pattern.MULTILINE | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);

    Matcher unicodeOutlierMatcher = unicodeOutliers.matcher(utf8tweet);
    utf8tweet = unicodeOutlierMatcher.replaceAll(" ");
    return utf8tweet;
}

From source file:Normalization.TextNormalization.java

public String removeNonEnglishWordsFromString(String content) {

    String utf8tweet = "";
    try {/*from  www .  j a  va 2 s  .  c om*/
        byte[] utf8Bytes = content.getBytes("UTF-8");

        utf8tweet = new String(utf8Bytes, "UTF-8");
    } catch (UnsupportedEncodingException e) {
    }

    final String regex = "[\\W]";
    final Pattern unicodeOutliers = Pattern.compile(regex,
            Pattern.MULTILINE | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);

    Matcher unicodeOutlierMatcher = unicodeOutliers.matcher(utf8tweet);
    utf8tweet = unicodeOutlierMatcher.replaceAll(" ");
    return utf8tweet;
}

From source file:org.dspace.app.statistics.LogAnalyser.java

/**
 * using the pre-configuration information passed here, analyse the logs
 * and produce the aggregation file//from w ww. jav  a  2  s. com
 *
 * @param   context     the DSpace context object this occurs under
 * @param   myLogDir    the passed log directory.  Uses default if null
 * @param   myFileTemplate  the passed file name regex.  Uses default if null
 * @param   myConfigFile    the DStat config file.  Uses default if null
 * @param   myOutFile    the file to which to output aggregation data.  Uses default if null
 * @param   myStartDate     the desired start of the analysis.  Starts from the beginning otherwise
 * @param   myEndDate       the desired end of the analysis.  Goes to the end otherwise
 * @param   myLookUp        force a lookup of the database
 * @return aggregate output
 * @throws IOException if IO error
 * @throws SQLException if database error
 * @throws SearchServiceException if search error
 */
public static String processLogs(Context context, String myLogDir, String myFileTemplate, String myConfigFile,
        String myOutFile, Date myStartDate, Date myEndDate, boolean myLookUp)
        throws IOException, SQLException, SearchServiceException {
    // FIXME: perhaps we should have all parameters and aggregators put 
    // together in a single aggregating object

    // if the timer has not yet been started, then start it
    startTime = new GregorianCalendar();

    //instantiate aggregators
    actionAggregator = new HashMap<String, Integer>();
    searchAggregator = new HashMap<String, Integer>();
    userAggregator = new HashMap<String, Integer>();
    itemAggregator = new HashMap<String, Integer>();
    archiveStats = new HashMap<String, Integer>();

    //instantiate lists
    generalSummary = new ArrayList<String>();
    excludeWords = new ArrayList<String>();
    excludeTypes = new ArrayList<String>();
    excludeChars = new ArrayList<String>();
    itemTypes = new ArrayList<String>();

    // set the parameters for this analysis
    setParameters(myLogDir, myFileTemplate, myConfigFile, myOutFile, myStartDate, myEndDate, myLookUp);

    // pre prepare our standard file readers and buffered readers
    FileReader fr = null;
    BufferedReader br = null;

    // read in the config information, throwing an error if we fail to open
    // the given config file
    readConfig(configFile);

    // assemble the regular expressions for later use (requires the file
    // template to build the regex to match it
    setRegex(fileTemplate);

    // get the log files
    File[] logFiles = getLogFiles(logDir);

    // standard loop counter
    int i = 0;

    // for every log file do analysis
    // FIXME: it is easy to implement not processing log files after the
    // dates exceed the end boundary, but is there an easy way to do it
    // for the start of the file?  Note that we can assume that the contents
    // of the log file are sequential, but can we assume the files are
    // provided in a data sequence?
    for (i = 0; i < logFiles.length; i++) {
        // check to see if this file is a log file agains the global regex
        Matcher matchRegex = logRegex.matcher(logFiles[i].getName());
        if (matchRegex.matches()) {
            // if it is a log file, open it up and lets have a look at the
            // contents.
            try {
                fr = new FileReader(logFiles[i].toString());
                br = new BufferedReader(fr);
            } catch (IOException e) {
                System.out.println("Failed to read log file " + logFiles[i].toString());
                System.exit(0);
            }

            // for each line in the file do the analysis
            // FIXME: perhaps each section needs to be dolled out to an
            // analysing class to allow pluggability of other methods of
            // analysis, and ease of code reading too - Pending further thought
            String line = null;
            while ((line = br.readLine()) != null) {
                // get the log line object
                LogLine logLine = getLogLine(line);

                // if there are line segments get on with the analysis
                if (logLine != null) {
                    // first find out if we are constraining by date and 
                    // if so apply the restrictions
                    if ((startDate != null) && (!logLine.afterDate(startDate))) {
                        continue;
                    }

                    if ((endDate != null) && (!logLine.beforeDate(endDate))) {
                        break;
                    }

                    // count the number of lines parsed
                    lineCount++;

                    // if we are not constrained by date, register the date
                    // as the start/end date if it is the earliest/latest so far
                    // FIXME: this should probably have a method of its own
                    if (startDate == null) {
                        if (logStartDate != null) {
                            if (logLine.beforeDate(logStartDate)) {
                                logStartDate = logLine.getDate();
                            }
                        } else {
                            logStartDate = logLine.getDate();
                        }
                    }

                    if (endDate == null) {
                        if (logEndDate != null) {
                            if (logLine.afterDate(logEndDate)) {
                                logEndDate = logLine.getDate();
                            }
                        } else {
                            logEndDate = logLine.getDate();
                        }
                    }

                    // count the warnings
                    if (logLine.isLevel("WARN")) {
                        // FIXME: really, this ought to be some kind of level
                        // aggregator
                        warnCount++;
                    }
                    // count the exceptions
                    if (logLine.isLevel("ERROR")) {
                        excCount++;
                    }

                    if (null == logLine.getAction()) {
                        continue;
                    }

                    // is the action a search?
                    if (logLine.isAction("search")) {
                        // get back all the valid search words from the query
                        String[] words = analyseQuery(logLine.getParams());

                        // for each search word add to the aggregator or
                        // increment the aggregator's counter
                        for (int j = 0; j < words.length; j++) {
                            // FIXME: perhaps aggregators ought to be objects
                            // themselves
                            searchAggregator.put(words[j], increment(searchAggregator, words[j]));
                        }
                    }

                    // is the action a login, and are we counting user logins?
                    if (logLine.isAction("login") && !userEmail.equals("off")) {
                        userAggregator.put(logLine.getUser(), increment(userAggregator, logLine.getUser()));
                    }

                    // is the action an item view?
                    if (logLine.isAction("view_item")) {
                        String handle = logLine.getParams();

                        // strip the handle string
                        Matcher matchHandle = handleRX.matcher(handle);
                        handle = matchHandle.replaceAll("");

                        // strip the item id string
                        Matcher matchItem = itemRX.matcher(handle);
                        handle = matchItem.replaceAll("").trim();

                        // either add the handle to the aggregator or
                        // increment its counter
                        itemAggregator.put(handle, increment(itemAggregator, handle));
                    }

                    // log all the activity
                    actionAggregator.put(logLine.getAction(), increment(actionAggregator, logLine.getAction()));
                }
            }

            // close the file reading buffers
            br.close();
            fr.close();

        }
    }

    // do we want to do a database lookup?  Do so only if the start and
    // end dates are null or lookUp is true
    // FIXME: this is a kind of separate section.  Would it be worth building
    // the summary string separately and then inserting it into the real
    // summary later?  Especially if we make the archive analysis more complex
    archiveStats.put("All Items", getNumItems(context));
    for (i = 0; i < itemTypes.size(); i++) {
        archiveStats.put(itemTypes.get(i), getNumItems(context, itemTypes.get(i)));
    }

    // now do the host name and url lookup
    hostName = ConfigurationManager.getProperty("dspace.hostname").trim();
    name = ConfigurationManager.getProperty("dspace.name").trim();
    url = ConfigurationManager.getProperty("dspace.url").trim();
    if ((url != null) && (!url.endsWith("/"))) {
        url = url + "/";
    }

    // do the average views analysis
    if ((archiveStats.get("All Items")).intValue() != 0) {
        // FIXME: this is dependent on their being a query on the db, which
        // there might not always be if it becomes configurable
        Double avg = Math.ceil((actionAggregator.get("view_item")).doubleValue()
                / (archiveStats.get("All Items")).doubleValue());
        views = avg.intValue();
    }

    // finally, write the output
    return createOutput();
}

From source file:edu.harvard.i2b2.pm.ws.PMService.java

public OMElement getVersion(OMElement getPMDataElement) throws I2B2Exception, JAXBUtilException {

    Pattern p = Pattern.compile("<password>.+</password>");
    Matcher m = p.matcher(getPMDataElement.toString());
    String outString = m.replaceAll("<password>*********</password>");

    p = Pattern.compile(">.+</ns9:set_password>");
    m = p.matcher(outString);//w w  w  . j a  v a  2s .c om
    outString = m.replaceAll(">*********</ns9:set_password>");
    log.debug("Received Request PM Element " + outString);

    OMElement returnElement = null;

    if (getPMDataElement == null) {
        log.error("Incoming Version request is null");
        throw new I2B2Exception("Incoming Version request is null");
    }

    VersionMessage servicesMsg = new VersionMessage(getPMDataElement.toString());

    String version = servicesMsg.getRequestMessageType().getMessageBody().getGetMessageVersion().toString();
    if (version.equals("")) {
        edu.harvard.i2b2.pm.datavo.i2b2versionmessage.ResponseMessageType pmDataResponse = new edu.harvard.i2b2.pm.datavo.i2b2versionmessage.ResponseMessageType();

        edu.harvard.i2b2.pm.datavo.i2b2versionmessage.ResponseMessageType.MessageBody mb = new edu.harvard.i2b2.pm.datavo.i2b2versionmessage.ResponseMessageType.MessageBody();
        mb.setI2B2MessageVersion(msgVersion);
        pmDataResponse.setMessageBody(mb);

        String xmlMsg = MessageFactory.convertToXMLString(pmDataResponse);

        try {
            returnElement = MessageFactory.createResponseOMElementFromString(xmlMsg);
            log.debug("my pm repsonse is: " + pmDataResponse);
            log.debug("my return is: " + returnElement);
        } catch (XMLStreamException e) {
            log.error("Error creating OMElement from response string " + pmDataResponse, e);
        }

    }

    return returnElement;

}

From source file:com.khs.sherpa.processor.RestfulRequestProcessor.java

public String getAction(HttpServletRequest request) {
    final Pattern pattern = Pattern.compile("\\{\\d?\\w+\\}");

    Matcher matcher = null;
    if (method.isAnnotationPresent(Action.class)) {
        for (String url : method.getAnnotation(Action.class).mapping()) {
            matcher = pattern.matcher(url);
            if (Pattern.matches(matcher.replaceAll("[^/]*"), UrlUtil.getPath(request))) {
                path = url;/*from  www  . j  a  v a 2 s .com*/
            }
        }
    }
    return MethodUtil.getMethodName(method);
}

From source file:org.lanes.text.mining.Conceptualiser.java

public String mapTermToConcept(String term) {
    long timestart = System.currentTimeMillis();

    String mappedconcept = "";
    try {/*from w ww  .  j av a 2s.co m*/
        List<String> parents = new ArrayList<String>();
        if (mappedconcept.equals("")) {
            //System.err.println("matchExact");

            mappedconcept = matchExact(term);
            if (!mappedconcept.equals("")) {
                parents = findNeighbours(mappedconcept, "HYPERNYMY");
            }

            //System.err.println("mappedconcept: " + mappedconcept + ", parents.size: " + parents.size() + "");
        }
        if ((mappedconcept.equals("")) || (!mappedconcept.equals("") && parents.size() == 0)) {
            //System.err.println("matchSynonym");

            if (mappedconcept.equals("")) {
                mappedconcept = matchSynonym(term);
            } else {
                mappedconcept = matchSynonym(mappedconcept);
            }

            if (!mappedconcept.equals("")) {

                parents = findNeighbours(mappedconcept, "HYPERNYMY");

            }
        }
        //System.err.println("mappedconcept: " + mappedconcept + ", parents.size: " + parents.size() + "");

        if ((!mappedconcept.equals("") && parents.size() == 0)) {
            //System.err.println("resolvePolysemy");

            mappedconcept = resolvePolysemy(mappedconcept, term, findNeighbours(mappedconcept, "POLYSEMY"));

            //System.err.println("mappedconcept: " + mappedconcept + ", parents.size: " + parents.size() + "");
        }
    } catch (Exception e) {
    }

    Matcher replace = Pattern.compile("\\s").matcher(mappedconcept);
    mappedconcept = replace.replaceAll("_");

    return mappedconcept;
}

From source file:at.ac.tuwien.inso.subcat.utility.commentparser.Parser.java

License:asdf

private void parseParagraph(List<ContentNode<T>> ast, String para, int paragraphSeparatorSize) {
    if (paragraphIsArtefact(para)) {
        ast.add(new ArtefactNode<T>(para));
        return;/* ww  w  . j ava2  s . co  m*/
    }

    Matcher normM = pNorm.matcher(para);
    String paraOut = normM.replaceAll(" ");

    if (paraOut.length() != 0) {
        ast.add(new ParagraphNode<T>(paraOut, para, paragraphSeparatorSize));
    }
}

From source file:kml.feature.Feature.java

public String stripHtml(String string) {
    if (string == null || string.length() == 0) {
        return string;
    }/*from  ww  w . j a va  2 s. c om*/
    Matcher m = REMOVE_TAGS.matcher(string);
    return m.replaceAll("");
}

From source file:edu.lternet.pasta.datapackagemanager.DataPackageManagerResourceTest.java

public static void modifyTestEmlFile(String testScope, File testEmlFile, String newPackageId) {
    String xmlString = FileUtility.fileToString(testEmlFile);
    Pattern pattern = Pattern.compile(testScope + "\\.\\d+\\.\\d+");
    Matcher matcher = pattern.matcher(xmlString);
    // Replace packageId value with new packageId value
    String modifiedXmlString = matcher.replaceAll(newPackageId);
    FileWriter fileWriter;//from   ww  w. j  a v  a2  s .  c  o m

    try {
        fileWriter = new FileWriter(testEmlFile);
        StringBuffer stringBuffer = new StringBuffer(modifiedXmlString);
        IOUtil.writeToWriter(stringBuffer, fileWriter, true);
    } catch (IOException e) {
        fail("IOException modifying packageId in test EML file: " + e.getMessage());
    }
}