List of usage examples for org.apache.commons.lang3 StringUtils stripEnd
public static String stripEnd(final String str, final String stripChars)
Strips any of a set of characters from the end of a String.
A null input String returns null .
From source file:org.wrml.runtime.schema.ProtoValueSource.java
/** * Convert the specified string value into a value that is compatible with the reference slot's type. * * @param stringValue The {@link String} value to coerce into a compatible value. * @param <T> The generic return type that enables the caller to omit the cast operator. * @return The converted value of the specified string value. *//*from w ww . j a va2s . c o m*/ private <T> T coerceStringValue(final String stringValue) { if (stringValue == null || _ReferenceProtoSlot == null) { return (T) stringValue; } final Context context = _ReferenceProtoSlot.getContext(); final SyntaxLoader syntaxLoader = context.getSyntaxLoader(); final Type referenceSlotType = _ReferenceProtoSlot.getHeapValueType(); if (ValueType.isListType(referenceSlotType)) { // [a, b, c] String listString = stringValue.trim(); listString = StringUtils.stripStart(listString, "["); listString = StringUtils.stripEnd(listString, "]"); if (listString.isEmpty()) { return (T) Collections.EMPTY_LIST; } final Type elementType = ValueType.getListElementType(referenceSlotType); final String[] listElementsStringArray = StringUtils.split(listString, ","); final List<Object> listValue = new ArrayList<>(listElementsStringArray.length); for (final String elementString : listElementsStringArray) { final Object element = syntaxLoader.parseSyntacticText(elementString.trim(), elementType); listValue.add(element); } return (T) listValue; } else { final Object value = syntaxLoader.parseSyntacticText(stringValue, referenceSlotType); return (T) value; } }
From source file:software.coolstuff.springframework.owncloud.service.impl.rest.OwncloudRestResourceServiceImpl.java
protected String appendOptionalSuffix(URL url, String suffix) { if (StringUtils.isBlank(suffix)) { return url.toString(); }/*from w w w .j ava 2 s. c o m*/ return StringUtils.stripEnd(url.toString(), SLASH) + SLASH + StringUtils.stripStart(suffix, SLASH); }
From source file:tpt.dbweb.cat.io.TaggedTextXMLReader.java
private Iterator<TaggedText> getIterator(InputStream is, String errorMessageInfo) { XMLStreamReader tmpxsr = null; try {/*from w w w .ja va2s . c o m*/ XMLInputFactory xif = XMLInputFactory.newInstance(); xif.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false); xif.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false); xif.setProperty(XMLInputFactory.IS_VALIDATING, false); tmpxsr = xif.createXMLStreamReader(is); } catch (XMLStreamException | FactoryConfigurationError e) { e.printStackTrace(); return null; } final XMLStreamReader xsr = tmpxsr; return new PeekIterator<TaggedText>() { @Override protected TaggedText internalNext() { ArrayList<TextSpan> openMarks = new ArrayList<>(); StringBuilder pureTextSB = new StringBuilder(); ArrayList<TextSpan> marks = new ArrayList<>(); marks.add(new TextSpan(null, 0, 0)); TaggedText tt = null; try { loop: while (xsr.hasNext()) { xsr.next(); int event = xsr.getEventType(); switch (event) { case XMLStreamConstants.START_ELEMENT: if ("articles".equals(xsr.getLocalName())) { } else if ("article".equals(xsr.getLocalName())) { tt = new TaggedText(); for (int i = 0; i < xsr.getAttributeCount(); i++) { if ("id".equals(xsr.getAttributeLocalName(i))) { tt.id = xsr.getAttributeValue(i); } tt.info().put(xsr.getAttributeLocalName(i), xsr.getAttributeValue(i)); } } else if ("mark".equals(xsr.getLocalName())) { TextSpan tr = new TextSpan(null, pureTextSB.length(), pureTextSB.length()); for (int i = 0; i < xsr.getAttributeCount(); i++) { tr.info().put(xsr.getAttributeLocalName(i), xsr.getAttributeValue(i)); } openMarks.add(tr); } else if ("br".equals(xsr.getLocalName())) { // TODO: how to propagate tags from the input to the output? } else { log.warn("ignore tag " + xsr.getLocalName()); } break; case XMLStreamConstants.END_ELEMENT: if ("mark".equals(xsr.getLocalName())) { // search corresponding <mark ...> TextSpan tr = openMarks.remove(openMarks.size() - 1); if (tr == null) { log.warn("markend at " + xsr.getLocation().getCharacterOffset() + " has no corresponding mark tag"); break; } tr.end = pureTextSB.length(); marks.add(tr); } else if ("article".equals(xsr.getLocalName())) { tt.text = StringUtils.stripEnd(pureTextSB.toString().trim(), " \t\n"); pureTextSB = new StringBuilder(); tt.mentions = new ArrayList<>(); for (TextSpan mark : marks) { String entity = mark.info().get("entity"); if (entity == null) { entity = mark.info().get("annotation"); } if (entity != null) { EntityMention e = new EntityMention(tt.text, mark.start, mark.end, entity); String minMention = mark.info().get("min"); String mention = e.getMention(); if (minMention != null && !"".equals(minMention)) { Pattern p = Pattern.compile(Pattern.quote(minMention)); Matcher m = p.matcher(mention); if (m.find()) { TextSpan min = new TextSpan(e.text, e.start + m.start(), e.start + m.end()); e.min = min; if (m.find()) { log.warn("found " + minMention + " two times in \"" + mention + "\""); } } else { String prefix = Utility.findLongestPrefix(mention, minMention); log.warn("didn't find min mention '" + minMention + "' in text '" + mention + "', longest prefix found: '" + prefix + "' in article " + tt.id); } } mark.info().remove("min"); mark.info().remove("entity"); if (mark.info().size() > 0) { e.info().putAll(mark.info()); } tt.mentions.add(e); } } openMarks.clear(); marks.clear(); break loop; } break; case XMLStreamConstants.CHARACTERS: String toadd = xsr.getText(); if (pureTextSB.length() == 0) { toadd = StringUtils.stripStart(toadd, " \t\n"); } if (toadd.contains("thanks")) { log.info("test"); } pureTextSB.append(toadd); break; } } } catch (XMLStreamException e) { log.error("{}", errorMessageInfo); throw new RuntimeException(e); } if (tt != null && tt.mentions != null) { tt.mentions.sort(null); } return tt; } }; }
From source file:tpt.dbweb.cat.tools.RegexWordTokenizer.java
@Override public List<TextSpan> getTokens(String text) { // split text and iterate over its parts String[] parts = pattern.split(text); List<TextSpan> result = new ArrayList<>(); int pos = 0;/* w ww.j a v a 2 s . c o m*/ for (int i = 0; i < parts.length; i++) { String part = parts[i]; int nextPos = pos + part.length(); try { // remove whitespace String ltrim = StringUtils.stripStart(part, null); String trim = StringUtils.stripEnd(ltrim, null); int start = pos + (part.length() - ltrim.length()); int end = start + trim.length(); // only add non-empty text spans if (start < end) { result.add(new TextSpan(text, start, end)); } } finally { pos = nextPos; } } return result; }
From source file:URLTree.FindOptimalPath.java
public void printMap(List<Node> node, int stage, String outputFile, int threshold, String ouptputFrequencyFile, String filed3) {/*w ww .j a v a 2 s .c o m*/ Map<String, Integer> countSimilarNode = new HashMap<String, Integer>(); String nodeMatrix[][] = new String[node.size()][stage]; for (int i = 0; i < node.size(); i++) { //System.out.println(Arrays.toString(node.get(i).getNodeArr())); String arr[] = node.get(i).getNodeArr(); for (int j = 0; j < arr.length; j++) { if (j < stage) { nodeMatrix[i][j] = arr[j]; } } } List<MergeSimilarNode> similarNode = new ArrayList<MergeSimilarNode>(); Map<String, Integer> wordMap = new HashMap<String, Integer>(); for (int i = 0; i < node.size(); i++) { for (int j = 0; j < stage; j++) { if (nodeMatrix[i][j] != null) { FreqWords nodeFreq = new FreqWords(); System.out.print("[" + i + j + "]: " + nodeMatrix[i][j]); if (wordMap.containsKey(nodeMatrix[i][j] + "," + j)) { wordMap.put(nodeMatrix[i][j] + "," + j, wordMap.get(nodeMatrix[i][j] + "," + j) + 1); } else { wordMap.put(nodeMatrix[i][j] + "," + j, 1); } } } System.out.println(); } List<FreqWords> freq = new ArrayList<FreqWords>(); for (Map.Entry<String, Integer> entry : wordMap.entrySet()) { FreqWords fwords = new FreqWords(); String key = entry.getKey(); Integer value = entry.getValue(); //String nodeStr[]=key.split(","); String[] nodeStr = new String[2]; StringTokenizer st = new StringTokenizer(key, ","); int k = 0; while (st.hasMoreTokens()) { nodeStr[k] = st.nextToken(); k++; } fwords.setNode(nodeStr[0]); System.out.println("node freq: " + nodeStr[1]); if (nodeStr[1] != null) { fwords.setFreq(Integer.parseInt(nodeStr[1])); } else { System.out.println("null value at" + nodeStr[0] + ": " + nodeStr[1]); } fwords.setValue(value); freq.add(fwords); } System.out.println("Done Matrix"); //System.out.println(); PrintWriter writer = null; List<String> urlList = new ArrayList<String>(); List<FilterURL> varifiedList = new ArrayList<FilterURL>(); try { writer = new PrintWriter(outputFile, "UTF-8"); for (int i = 0; i < node.size(); i++) { String urlArr[] = new String[stage]; int flag = 0; int sum = 0; for (int j = 0; j < stage; j++) { if (nodeMatrix[i][j] != null) { for (FreqWords words : freq) { //System.out.println(); //if(words.getNode().equals("ca")) // System.out.println("words: "+words.getNode()+" i: "+i+"j: "+j+" value: "+nodeMatrix[i][j]+" word Freq: "+ words.getFreq()); if (words.getNode().equals(nodeMatrix[i][j])) { //if(words.getFreq()!=0) //{ if (words.getFreq() == j) { int count = node.get(i).getCount(); //url=url.append(nodeMatrix[i][j]).append("(").append(value+node.get(i).getCount()).append(")-"); if (threshold > words.getValue()) { flag = 1; break; } else { sum += count; urlArr[j] = nodeMatrix[i][j]; // System.out.println("i: "+i+"j: "+j+" found: "+urlArr[j]); break; //System.out.print(nodeMatrix[i][j]+"("+value+")"); } } //} } } } if (flag == 1) { flag = 0; break; } } //String urldata=StringUtils.join(urlArr,"-"); String urldata = Joiner.on("-").skipNulls().join(urlArr).trim(); //System.out.println("Mearge URL"+urldata); if (urldata.endsWith("-")) { urldata = urldata.substring(0, urldata.length() - 1); } urldata = StringUtils.stripEnd(urldata, null); // System.out.print(urldata); if (!urldata.isEmpty()) { if (urldata.contains("-")) { // writer.println(urldata+","+sum); FilterURL filter = new FilterURL(); filter.setReversedURL(urldata); filter.setCount(sum); varifiedList.add(filter); } } //System.out.println(i); // System.out.println(i); } //varifiedSequece(varifiedList,writer); for (int i = 0; i < varifiedList.size() - 1; i++) { if (varifiedList.get(i).getReversedURL().contains(varifiedList.get(i + 1).getReversedURL())) { } else { if (threshold != 0) { String nodesName[] = varifiedList.get(i).getReversedURL().split("-"); if (nodesName.length == 2) { try { String url2Domain = "http://" + nodesName[1] + "." + nodesName[0]; URL url = new URL(url2Domain); // open connection HttpURLConnection httpURLConnection = (HttpURLConnection) url .openConnection(Proxy.NO_PROXY); // stop following browser redirect httpURLConnection.setInstanceFollowRedirects(false); httpURLConnection.setConnectTimeout(15000); httpURLConnection.setReadTimeout(15000); // extract location header containing the actual destination URL String expandedURL = httpURLConnection.getHeaderField("Location"); httpURLConnection.disconnect(); if (expandedURL != null) { System.out.println("Correct: " + expandedURL); writer.println(varifiedList.get(i).getReversedURL() + "," + varifiedList.get(i).getCount()); } } catch (Exception e) { System.out.println("Incorrect: " + e); } } else { writer.println( varifiedList.get(i).getReversedURL() + "," + varifiedList.get(i).getCount()); } } else { writer.println(varifiedList.get(i).getReversedURL() + "," + varifiedList.get(i).getCount()); } } } } catch (Exception e) { e.printStackTrace(); System.out.println(e); } finally { if (writer != null) { writer.close(); // **** closing it flushes it and reclaims resources **** } } System.out.println("Write into File->" + outputFile); Path obj = new Path(); obj.finalFrequency(outputFile, ouptputFrequencyFile); // obj.preSunbrust(ouptputFrequencyFile,filed3); }