List of usage examples for org.apache.commons.lang StringUtils isAlphanumeric
public static boolean isAlphanumeric(String str)
Checks if the String contains only unicode letters or digits.
From source file:gov.nih.nci.evs.browser.utils.DataUtils.java
public static String encode_term(String s) { if (s == null) return null; if (StringUtils.isAlphanumeric(s)) return s; StringBuilder buf = new StringBuilder(); for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); if (c == 60) { buf.append("< "); } else if (c == 62) { buf.append(">"); } else if (c == 38) { buf.append("&"); } else if (c == 32) { buf.append(" "); } else {//from w ww .j a v a 2 s. c om buf.append(c); } } String t = buf.toString(); return t; }
From source file:opennlp.tools.apps.object_dedup.SimilarityAccessorBase.java
protected List<String> removeDollarWordAndNonAlphaFromList(List<String> list) { List<String> result = new ArrayList<String>(); Pattern p = Pattern.compile("^\\$(\\d{1,3}(\\,\\d{3})*|(\\d+))(\\.\\d{2})?$"); for (String w : list) { if (!(p.matcher(w).find()) && StringUtils.isAlphanumeric(w) && (w.length() >= 3 || !StringUtils.isAlpha(w))) result.add(w);/*from w w w . j a v a2s . co m*/ } return result; }
From source file:opennlp.tools.apps.object_dedup.SimilarityAccessorBase.java
public List<String> removeVenuePart(ArrayList<String> toks) { List<String> results = new ArrayList<String>(); boolean bVenuePart = false; for (String word : toks) { // beginning of venue part if (word.equals("at") || word.equals("@")) bVenuePart = true;/*ww w . j a va 2 s. c om*/ // end of venue part if (!StringUtils.isAlphanumeric(word) || word.startsWith("<punc")) bVenuePart = false; if (!bVenuePart && !word.startsWith("<punc")) results.add(word); } return results; }
From source file:opennlp.tools.apps.object_dedup.SimilarityAccessorBase.java
protected boolean isCapitalized(String lookup) { String[] titleWords = lookup.split(" "); int count = 0; for (String word : titleWords) { if (word.length() < 2) // '-', '|', ':' break; if (word.equals(word.toLowerCase()) && (!Arrays.asList(englishPrepositions).contains(word)) && word.length() > 3 && StringUtils.isAlphanumeric(word)) continue; // was return false; if (count > 3) break; count++;//from w ww . j a va 2 s .c o m } return true; }
From source file:opennlp.tools.parse_thicket.opinion_processor.StopList.java
public static List<List<String>> preFilterCommonEnglishExpressions(List<String> userLikes) { List<List<String>> results = new ArrayList<List<String>>(); List<String> resultUserLikes = new ArrayList<String>(), potentialCategs = new ArrayList<String>(); if (userLikes.size() < 6) {// too short, do not filter results.add(userLikes);/* ww w. j av a 2 s . c om*/ results.add(potentialCategs); return results; } for (String like : userLikes) { like = like.toLowerCase(); if (!StringUtils.isAlphanumeric(like.replace(" ", ""))) { logger.info("removed isAlphanumeric " + like); continue; } if (StringUtils.isNumeric(like)) { logger.info("removed isNumericSpace " + like); continue; } if (like.length() < 4) { logger.info("removed too short likes " + like); continue; } boolean existFirstName = false, allWordsCommonEnglish = true, bStop = false; String[] comps = like.split(" "); StringBuffer buf = new StringBuffer(); for (String word : comps) { boolean isCommon = isCommonWord(word); boolean isName = isFirstName(word); if (!isCommon) allWordsCommonEnglish = false; if (isName) existFirstName = true; if (isStopWord(word) || word.length() < 3) bStop = true; else buf.append(word + " "); } // / does not have to include stop word if (!existFirstName && allWordsCommonEnglish && comps.length < 3) { logger.info("moved to category: NoFirstName+AllCommonEng+ShorterThan3 " + like); continue; } if (!existFirstName && allWordsCommonEnglish && comps.length == 1) { logger.info("moved to category: NoFirstName+AllCommonEng+Short1word " + like); potentialCategs.add(like); continue; } if (existFirstName && comps.length == 1) { logger.info("removed : only first name, no last name " + like); continue; } resultUserLikes.add(buf.toString().trim()); } resultUserLikes = new ArrayList<String>(new HashSet<String>(resultUserLikes)); if (resultUserLikes.size() > 1) { results.add(resultUserLikes); results.add(potentialCategs); return results; } else {// do not do reduction results.add(userLikes); results.add(potentialCategs); return results; } }
From source file:opennlp.tools.parse_thicket.opinion_processor.StopList.java
public static boolean isAcceptableIndividualLikes(String like) { StopList finder = StopList.getInstance(); like = like.toLowerCase();/*from w ww .j av a 2 s. co m*/ if (!StringUtils.isAlphanumeric(like.replace(" ", ""))) { logger.info("removed isAlphanumeric " + like); return false; } if (StringUtils.isNumeric(like)) { logger.info("removed isNumericSpace " + like); return false; } if (like.length() < 4) { logger.info("removed too short likes " + like); return false; } boolean existFirstName = false, allWordsCommonEnglish = true, bStop = false; String[] comps = like.split(" "); StringBuffer buf = new StringBuffer(); for (String word : comps) { boolean isCommon = finder.isCommonWord(word); boolean isName = finder.isFirstName(word); if (!isCommon) allWordsCommonEnglish = false; if (isName) existFirstName = true; if (finder.isStopWord(word) || word.length() < 3) bStop = true; else buf.append(word + " "); } // / does not have to include stop word if (!existFirstName && allWordsCommonEnglish && comps.length < 3) { logger.info(" NoFirstName+AllCommonEng+ShorterThan3 " + like); return false; } if (!existFirstName && allWordsCommonEnglish && comps.length == 1) { logger.info(" NoFirstName+AllCommonEng+Short1word " + like); return false; } if (existFirstName && comps.length == 1) { logger.info("removed : only first name, no last name " + like); return false; } return true; }
From source file:opennlp.tools.similarity.apps.solr.WordDocBuilderEndNotes.java
public String buildWordDoc(List<HitBase> content, String title) { String outputDocFinename = absPath + "written/" + title.replace(' ', '_').replace('\"', ' ').trim() + ".docx"; WordprocessingMLPackage wordMLPackage = null; List<String> imageURLs = getAllImageSearchResults(title); int count = 0; BigInteger refId = BigInteger.ONE; try {/* w ww.ja v a 2 s . c o m*/ wordMLPackage = WordprocessingMLPackage.createPackage(); CTEndnotes endnotes = null; try { EndnotesPart ep = new EndnotesPart(); endnotes = Context.getWmlObjectFactory().createCTEndnotes(); ep.setJaxbElement(endnotes); wordMLPackage.getMainDocumentPart().addTargetPart(ep); } catch (InvalidFormatException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } wordMLPackage.getMainDocumentPart().addStyledParagraphOfText("Title", title.toUpperCase()); for (HitBase para : content) { if (para.getFragments() == null || para.getFragments().size() < 1) // no found content in this hit continue; try { String processedParaTitle = processParagraphTitle(para.getTitle()); if (processedParaTitle != null && !processedParaTitle.endsWith("..") || StringUtils.isAlphanumeric(processedParaTitle)) { wordMLPackage.getMainDocumentPart().addStyledParagraphOfText("Subtitle", processedParaTitle); } String paraText = processParagraphText(para.getFragments().toString()); wordMLPackage.getMainDocumentPart().addParagraphOfText(paraText); CTFtnEdn endnote = Context.getWmlObjectFactory().createCTFtnEdn(); endnotes.getEndnote().add(endnote); endnote.setId(refId); refId.add(BigInteger.ONE); String url = para.getUrl(); String endnoteBody = "<w:p xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" ><w:pPr><w:pStyle w:val=\"EndnoteText\"/></w:pPr><w:r><w:rPr>" + "<w:rStyle w:val=\"EndnoteReference\"/></w:rPr><w:endnoteRef/></w:r><w:r><w:t xml:space=\"preserve\"> " + url + "</w:t></w:r></w:p>"; try { endnote.getEGBlockLevelElts().add(XmlUtils.unmarshalString(endnoteBody)); } catch (JAXBException e) { // TODO Auto-generated catch block e.printStackTrace(); } // Add the body text referencing it String docBody = "<w:p xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" ><w:r><w:t>"//+ paraText /*+ refId.toString()*/ + "</w:t></w:r><w:r><w:rPr><w:rStyle w:val=\"EndnoteReference\"/></w:rPr><w:endnoteReference w:id=\"" + refId.toString() + "\"/></w:r></w:p>"; try { wordMLPackage.getMainDocumentPart().addParagraph(docBody); } catch (JAXBException e) { // TODO Auto-generated catch block e.printStackTrace(); } try { addImageByImageURLToPackage(count, wordMLPackage, imageURLs); } catch (Exception e) { // no need to report issues //e.printStackTrace(); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } count++; } // now add URLs wordMLPackage.getMainDocumentPart().addStyledParagraphOfText("Subtitle", "REFERENCES"); for (HitBase para : content) { if (para.getFragments() == null || para.getFragments().size() < 1) // no found content in this hit continue; try { wordMLPackage.getMainDocumentPart().addStyledParagraphOfText("Subtitle", para.getTitle()); String paraText = para.getUrl(); wordMLPackage.getMainDocumentPart().addParagraphOfText(paraText); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } try { wordMLPackage.save(new File(outputDocFinename)); System.out.println("Finished creating docx =" + outputDocFinename); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } try { String fileNameToDownload = "/var/www/wrt_latest/" + title.replace(' ', '_').replace('\"', ' ').trim() + ".docx"; wordMLPackage.save(new File(fileNameToDownload)); System.out.println("Wrote a doc for download :" + fileNameToDownload); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } return outputDocFinename; }
From source file:org.apache.cloudstack.storage.datastore.driver.SolidfirePrimaryDataStoreDriver.java
private String getSolidFireVolumeName(String strCloudStackVolumeName) { final String specialChar = "-"; StringBuilder strSolidFireVolumeName = new StringBuilder(); for (int i = 0; i < strCloudStackVolumeName.length(); i++) { String strChar = strCloudStackVolumeName.substring(i, i + 1); if (StringUtils.isAlphanumeric(strChar)) { strSolidFireVolumeName.append(strChar); } else {//from w w w .ja v a2s .c o m strSolidFireVolumeName.append(specialChar); } } return strSolidFireVolumeName.toString(); }
From source file:org.apache.cloudstack.storage.datastore.util.SolidFireUtil.java
public static String getSolidFireVolumeName(String strCloudStackVolumeName) { final String specialChar = "-"; StringBuilder strSolidFireVolumeName = new StringBuilder(); for (int i = 0; i < strCloudStackVolumeName.length(); i++) { String strChar = strCloudStackVolumeName.substring(i, i + 1); if (StringUtils.isAlphanumeric(strChar)) { strSolidFireVolumeName.append(strChar); } else {//w w w .j a va 2s . c o m strSolidFireVolumeName.append(specialChar); } } return strSolidFireVolumeName.toString(); }
From source file:org.apache.roller.weblogger.ui.rendering.model.UtilitiesModel.java
public boolean isAlphanumeric(String str) { return StringUtils.isAlphanumeric(str); }