List of usage examples for java.util.regex Matcher end
public int end(String name)
From source file:fr.eurecom.nerd.core.proxy.WikimetaClient.java
public List<TEntity> parse(String json, String text, OntologyType otype) throws IOException { List<TEntity> result = new LinkedList<TEntity>(); Map<String, Integer> map = new HashMap<String, Integer>(); try {/* ww w .ja v a 2 s. c o m*/ JSONObject o = new JSONObject(json); JSONArray jadocument = o.getJSONArray("document"); // 3 items is Named Entities JSONObject jodocument = jadocument.getJSONObject(2); JSONArray jsonarray = jodocument.getJSONArray("Named Entities"); for (int i = 0; i < jsonarray.length(); i++) { JSONObject jo = jsonarray.getJSONObject(i); String entity = jo.getString("EN"); String type = (jo.getString("type").equals("")) ? null : jo.getString("type"); String nerdType = OntoFactory.mapper.getNerdType(otype, entity, SOURCE, type).toString(); String uri = jo.getString("URI"); //logic to compute the startchar and endchar of the entity within the text Integer startchar = null, endchar = null; if (map.containsKey(entity)) { int value = map.get(entity); map.remove(entity); map.put(entity, new Integer(value + 1)); } else map.put(entity, new Integer(1)); try { Pattern p = Pattern.compile("\\b" + entity + "\\b"); Matcher m = p.matcher(text); for (int j = 0; j < map.get(entity) && m.find(); j++) { startchar = m.start(0); endchar = m.end(0); if (containsAtIndex(result, startchar, endchar)) j--; } double confidence = 0.0; if (!jo.getString("confidenceScore").equals("")) confidence = Double.parseDouble(jo.getString("confidenceScore")); if (startchar != null && endchar != null) { TEntity extraction = new TEntity(entity, type, uri, nerdType.toString(), startchar, endchar, confidence, SOURCE); result.add(extraction); } } catch (PatternSyntaxException eregex) { eregex.printStackTrace(); } } } catch (JSONException e) { e.printStackTrace(); } return result; }
From source file:com.norconex.commons.lang.io.TextReader.java
/** * Reads the next chunk of text, up to the maximum read size specified. * It tries as much as possible to break long text into paragraph, * sentences or words, before returning. See class documentation. * @return text read// ww w. ja va 2s . c om * @throws IOException problem reading text. */ public String readText() throws IOException { char[] text = new char[maxReadSize - buffer.length()]; int num = reader.read(text); if (num == -1) { return null; } buffer.append(String.valueOf(text, 0, num)); // Return all if we reached the end. reader.mark(1); if (reader.read() == -1) { String t = buffer.toString(); buffer.setLength(0); reader.reset(); return t; } else { reader.reset(); } Matcher m = null; // Try breaking at paragraph: m = paragraphDelimiterPattern.matcher(buffer); if (m.find()) { int mStart = m.start(m.groupCount()); int mEnd = m.end(m.groupCount()); int substringEnd = mEnd; if (removeTrailingDelimiter) { substringEnd = mStart; } String t = buffer.substring(0, substringEnd); buffer.delete(0, substringEnd); return t; } // Try breaking at sentence: m = sentencePattern.matcher(buffer); if (m.find()) { int mStart = m.start(1); int mEnd = m.end(1); int substringEnd = mEnd; if (removeTrailingDelimiter) { substringEnd = mStart; } String t = buffer.substring(0, substringEnd); buffer.delete(0, substringEnd); return t; } // Try breaking at word: m = wordDelimiterPattern.matcher(buffer); if (m.find()) { int mStart = m.start(m.groupCount()); int mEnd = m.end(m.groupCount()); int substringEnd = mEnd; if (removeTrailingDelimiter) { substringEnd = mStart; } String t = buffer.substring(0, substringEnd); buffer.delete(0, substringEnd); return t; } String t = buffer.toString(); buffer.setLength(0); return t; }
From source file:fr.smile.liferay.LiferayUrlRewriter.java
/** * Fix all resources urls and return the result. * * @param input The original charSequence to be processed. * @param requestUrl The request URL.//from w w w .j a v a2s. c o m * @param baseUrlParam The base URL selected for this request. * @return the result of this renderer. */ public CharSequence rewriteHtml(CharSequence input, String requestUrl, Pattern pattern, String baseUrlParam, String visibleBaseUrl) { if (LOG.isDebugEnabled()) { LOG.debug("input=" + input); LOG.debug("rewriteHtml (requestUrl=" + requestUrl + ", pattern=" + pattern + ",baseUrlParam)" + baseUrlParam + ",strVisibleBaseUrl=" + visibleBaseUrl + ")"); } StringBuffer result = new StringBuffer(input.length()); Matcher m = pattern.matcher(input); while (m.find()) { if (LOG.isTraceEnabled()) { LOG.trace("found match: " + m); } String url = input.subSequence(m.start(3) + 1, m.end(3) - 1).toString(); url = rewriteUrl(url, requestUrl, baseUrlParam, visibleBaseUrl); url = url.replaceAll("\\$", "\\\\\\$"); // replace '$' -> '\$' as it // denotes group StringBuffer tagReplacement = new StringBuffer("<$1$2=\"").append(url).append("\""); if (m.groupCount() > 3) { tagReplacement.append("$4"); } tagReplacement.append('>'); if (LOG.isTraceEnabled()) { LOG.trace("replacement: " + tagReplacement); } m.appendReplacement(result, tagReplacement.toString()); } m.appendTail(result); return result; }
From source file:de.tudarmstadt.ukp.dkpro.core.textnormalizer.ReplacementFileNormalizer.java
@Override protected Map<Integer, List<SofaChangeAnnotation>> createSofaChangesMap(JCas jcas) { Map<Integer, List<SofaChangeAnnotation>> changesMap = new TreeMap<Integer, List<SofaChangeAnnotation>>(); int mapKey = 1; String coveredText = jcas.getDocumentText().toLowerCase(); List<SofaChangeAnnotation> scaChangesList = new ArrayList<SofaChangeAnnotation>(); for (Map.Entry<String, String> entry : replacementMap.entrySet()) { String replacementKey = entry.getKey().toLowerCase(); String replacementValue = targetSurroundings + entry.getValue() + targetSurroundings; String regex = srcSurroundingsStart + "(" + Pattern.quote(replacementKey) + ")" + srcSurroundingsEnd; Pattern pattern = Pattern.compile(regex); Matcher matcher = pattern.matcher(coveredText); int groupNumberOfKey = (matcher.groupCount() == 1) ? 1 : 2; while (matcher.find()) { int start = matcher.start(groupNumberOfKey); int end = matcher.end(groupNumberOfKey); SofaChangeAnnotation sca = new SofaChangeAnnotation(jcas); sca.setBegin(start);/*from ww w. j a va 2s . com*/ sca.setEnd(end); sca.setOperation(OP_REPLACE); sca.setValue(replacementValue); scaChangesList.add(sca); System.out.println(matcher.group(0)); } } changesMap.put(mapKey++, scaChangesList); return changesMap; }
From source file:com.edgenius.wiki.render.filter.LinkFilter.java
public List<Region> getRegions(CharSequence input) { final List<Region> list = new ArrayList<Region>(); regexProvider.replaceByTokenVisitor(input, new TokenVisitor<Matcher>() { public void handleMatch(StringBuffer buffer, Matcher matcher) { int contentStart = matcher.start(1); int contentEnd = matcher.end(1); int start = contentStart - 1; int end = contentEnd + 1; String full = matcher.group(1); int sep; //link has possible 2 Region, [view>link], entire text is immutable region, but view is mutable. if ((sep = StringUtil.indexSeparatorWithoutEscaped(full, ">")) != -1) { //entire is immutable Region bodyRegion = new Region(LinkFilter.this, true, start, end, contentStart, contentEnd); //view part is normal mutable, it needs independent render Region viewPartRegion = new Region(LinkFilter.this, false, contentStart, contentStart + sep, contentStart, contentStart + sep); bodyRegion.setSubRegion(viewPartRegion); list.add(bodyRegion);/*from w w w. j a va 2 s. c o m*/ } else { //[viewAsLink] only 1 region, and it is immutable list.add(new Region(LinkFilter.this, true, start, end, contentStart, contentEnd)); } } }); return list; }
From source file:net.healeys.lexic.online.OnlineGame.java
public boolean start() { Pattern pat = Pattern.compile("(\\w+):(.+)"); for (int attempt = 0; attempt < MAX_ATTEMPTS; attempt++) { try {/*from w w w.j a va2 s. c o m*/ HttpClient httpClient = new DefaultHttpClient(); HttpGet get = new HttpGet(uri); addHeaders(get); HttpResponse resp = httpClient.execute(get); BufferedReader br = new BufferedReader(new InputStreamReader(resp.getEntity().getContent())); String line; while ((line = br.readLine()) != null) { // Log.d(TAG,"line:"+line); Matcher mat = pat.matcher(line); if (mat.find()) { String key = line.substring(mat.start(1), mat.end(1)); String value = line.substring(mat.start(2), mat.end(2)); // Log.d(TAG,"key:"+key); // Log.d(TAG,"value:"+value); if (key.equals("board")) { String[] letters = value.split(","); if (letters.length == 16) { setBoard(new FourByFourBoard(letters)); } else if (letters.length == 25) { setBoard(new FiveByFiveBoard(letters)); } } else if (key.equals("id")) { id = Integer.parseInt(value); } else { urls.put(key, value); } } } super.start(); return true; } catch (Exception e) { // Log.e(TAG,"Connection Error in constructor",e); } } super.start(); return false; }
From source file:com.joliciel.talismane.filters.SentenceHolderImpl.java
@Override public List<Sentence> getDetectedSentences(Sentence leftover) { if (LOG.isTraceEnabled()) { LOG.trace("getDetectedSentences. leftover=" + leftover); }//from ww w .j av a 2 s.co m List<Sentence> sentences = new ArrayList<Sentence>(); int currentIndex = 0; boolean haveLeftOvers = this.getText().length() > 0; if (this.sentenceBoundaries.size() > 0) { haveLeftOvers = false; int lastSentenceBoundary = this.sentenceBoundaries.descendingIterator().next(); if (lastSentenceBoundary < this.getText().length() - 1) { haveLeftOvers = true; } if (LOG.isTraceEnabled()) { LOG.trace("haveLeftOvers? " + lastSentenceBoundary + " < " + (this.getText().length() - 1) + " = " + haveLeftOvers); } } List<Integer> allBoundaries = new ArrayList<Integer>(this.sentenceBoundaries); if (haveLeftOvers) allBoundaries.add(this.getText().length() - 1); for (int sentenceBoundary : allBoundaries) { boolean isLeftover = haveLeftOvers && sentenceBoundary == this.getText().length() - 1; Sentence sentence = filterService.getSentence(); int leftOverTextLength = 0; String text = ""; if (leftover != null) { sentence = leftover; leftOverTextLength = leftover.getText().length(); text = leftover.getText() + this.getText().substring(currentIndex, sentenceBoundary + 1); leftover = null; } else { text = this.getText().substring(currentIndex, sentenceBoundary + 1); } // handle trim & duplicate white space here Matcher matcherOpeningWhiteSpace = openingWhiteSpacePattern.matcher(text); int openingWhiteSpaceEnd = 0; if (matcherOpeningWhiteSpace.find()) { openingWhiteSpaceEnd = matcherOpeningWhiteSpace.end(1); } int closingWhiteSpaceStart = text.length(); if (!isLeftover) { Matcher matcherClosingWhiteSpace = closingWhiteSpacePattern.matcher(text); if (matcherClosingWhiteSpace.find()) { closingWhiteSpaceStart = matcherClosingWhiteSpace.start(1); } } Matcher matcherDuplicateWhiteSpace = duplicateWhiteSpacePattern.matcher(text); Set<Integer> duplicateWhiteSpace = new HashSet<Integer>(); while (matcherDuplicateWhiteSpace.find()) { // remove all white space barring the first for (int i = matcherDuplicateWhiteSpace.start() + 1; i < matcherDuplicateWhiteSpace.end(); i++) { duplicateWhiteSpace.add(i); } } StringBuilder sb = new StringBuilder(); int i = currentIndex; for (int j = 0; j < text.length(); j++) { boolean appendLetter = false; if (j < openingWhiteSpaceEnd) { // do nothing } else if (j >= closingWhiteSpaceStart) { // do nothing } else if (duplicateWhiteSpace.contains(j)) { // do nothing } else { appendLetter = true; } if (j >= leftOverTextLength) { // if we're past the leftovers and onto the new stuff if (appendLetter) sentence.addOriginalIndex(this.getOriginalIndexes().get(i)); if (this.getOriginalTextSegments().containsKey(i)) sentence.getOriginalTextSegments().put(sb.length(), this.getOriginalTextSegments().get(i)); i++; } if (appendLetter) sb.append(text.charAt(j)); } sentence.setText(sb.toString()); if (LOG.isTraceEnabled()) { LOG.trace("sentence.setText |" + sentence.getText() + "|"); } sentence.setComplete(!isLeftover); for (Entry<Integer, Integer> newlineLocation : this.newlines.entrySet()) { sentence.addNewline(newlineLocation.getKey(), newlineLocation.getValue()); } sentence.setFileName(this.getFileName()); sentences.add(sentence); currentIndex = sentenceBoundary + 1; } return sentences; }
From source file:fr.eurecom.nerd.core.proxy.ExtractivClient.java
private List<TEntity> parse(String text, String serviceKey, OntologyType otype) { List<TEntity> result = new LinkedList<TEntity>(); URI endpoint;// ww w . ja v a 2 s .c om try { endpoint = new URI(EXTRACTIV_SERVER_LOCATION); HttpMethodBase extractivRequest = getExtractivProcessString(endpoint, text, serviceKey); InputStream extractivResults = fetchHttpRequest(extractivRequest); Readable jsonReadable = new InputStreamReader(extractivResults); ExtractivJSONParser jsonParser = new ExtractivJSONParser(jsonReadable); Map<String, Integer> map = new HashMap<String, Integer>(); for (Document document : jsonParser) for (com.extractiv.Entity item : document.getEntities()) { String label = item.asString(); String type = item.getType(); String nerdType = OntoFactory.mapper.getNerdType(otype, label, SOURCE, type).toString(); String uri = (item.getLinks().size() > 0) ? item.getLinks().get(0) : "null"; // Integer startChar = item.getOffset(); // Integer endChar = startChar + item.getCharLength(); // TEntity extraction = new TEntity(label, type, uri, nerdType, // startChar, endChar, confidence, SOURCE); // result.add(extraction); //logic to compute the startchar and endchar of the entity within the text Integer startchar = null, endchar = null; if (map.containsKey(label)) { int value = map.get(label); map.remove(label); map.put(label, new Integer(value + 1)); } else map.put(label, new Integer(1)); try { Pattern p = Pattern.compile("\\b" + label + "\\b"); Matcher m = p.matcher(text); for (int j = 0; j < map.get(label) && m.find(); j++) { startchar = m.start(0); endchar = m.end(0); if (containsAtIndex(result, startchar, endchar)) j--; } Double confidence = 0.5; if (startchar != null && endchar != null) { TEntity extraction = new TEntity(label, type, uri, nerdType.toString(), startchar, endchar, confidence, SOURCE); result.add(extraction); } } catch (PatternSyntaxException eregex) { eregex.printStackTrace(); } } } catch (URISyntaxException e) { e.printStackTrace(); } catch (BadInputException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } return result; }
From source file:com.dreamlinx.automation.DINRelay.java
/** * Creates an HttpClient to communicate with the DIN relay. * @throws MalformedURLException/* w w w.j a v a2s . c om*/ * @throws HttpException * @throws IOException */ private void setupHttpClient() throws MalformedURLException, HttpException, IOException { httpClient = new HttpClient(); httpClient.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY); GetMethod getMethod = new GetMethod("http://" + ipAddress); int result = httpClient.executeMethod(getMethod); if (result != 200) { throw new HttpException(result + " - " + getMethod.getStatusText()); } String response = getMethod.getResponseBodyAsString(); getMethod.releaseConnection(); String regex = "name=\"Challenge\" value=\".*\""; Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE); Matcher matcher = pattern.matcher(response); String challenge = ""; while (matcher.find()) { int start = matcher.start(0); int end = matcher.end(0); challenge = response.substring(start + 24, end - 1); } String md5Password = challenge + username + password + challenge; md5Password = toMD5(md5Password); PostMethod postMethod = new PostMethod("http://" + ipAddress + "/login.tgi"); postMethod.addParameter("Username", username); postMethod.addParameter("Password", md5Password); result = httpClient.executeMethod(postMethod); if (result != 200) { throw new HttpException(result + " - " + postMethod.getStatusText()); } postMethod.releaseConnection(); }
From source file:ch.sourcepond.maven.release.pom.VersionTransferWriter.java
@Override public void close() throws IOException { final Matcher matcher = VERSION_PATTERN.matcher(toString()); final Matcher originalMatcher = VERSION_PATTERN.matcher(original); int originalIdx = 0; int startIdx = 0; while (find(matcher, originalMatcher, originalIdx)) { final String newVersion = matcher.group(VERSION_VALUE); startIdx = originalMatcher.start(VERSION_VALUE); original.replace(startIdx, originalMatcher.end(VERSION_VALUE), newVersion); originalIdx = startIdx + newVersion.length(); }//from w w w . ja va2 s .co m try (final Writer writer = new BufferedWriter(new FileWriter(file))) { writer.write(original.toString()); } }