List of usage examples for java.lang Character isLetter
public static boolean isLetter(int codePoint)
From source file:org.semanticweb.owlapi.model.IRI.java
protected boolean disallowed(char ch) { return !Character.isLetter(ch) && !Character.isDigit(ch) && ch != '.' && ch != '+' && ch != '-'; }
From source file:eu.crisis_economics.configuration.FromFileConfigurationContext.java
private static String removeCounterExpressions(String expression) { String result = expression;//from w ww . ja v a 2 s . co m for (int i = 0; i < result.length(); ++i) { if (result.charAt(i) == '#') { int j = i + 1; String newNameToRemove = "#"; for (; j < result.length(); ++j) { Character nextChar = result.charAt(j); if (Character.isLetter(nextChar) || nextChar == '_') newNameToRemove += nextChar; else break; } result = result.substring(0, i) + "0" + result.substring(j); j += 1 - newNameToRemove.length(); i = j; } } return result; }
From source file:org.apache.wiki.util.comparators.HumanComparator.java
/** * Very broadly characterises a character as a digit, a letter or a punctuation character. * /* w w w. j a v a 2s . com*/ * @param c <code>char</code> to be characterised * @return <code>IS_DIGIT</code> if it's a digit, <code>IS_LETTER</code> if * it's a letter, <code>IS_PUNC</code> otherwise. */ private CharType mapCharTypes(char c) { if (Character.isDigit(c)) { return CharType.TYPE_DIGIT; } else if (Character.isLetter(c)) { return CharType.TYPE_LETTER; } else { return CharType.TYPE_OTHER; } }
From source file:org.eclipse.riena.ui.ridgets.validation.AbstractValidDate.java
private int nextSeparatorIndex(final String value) { for (int i = 0; i < value.length(); i++) { if (!Character.isLetter(value.charAt(i))) { return i; }/*from w w w . ja v a2 s . c o m*/ } return -1; }
From source file:org.nmrfx.processor.gui.RefManager.java
public String getParString(int nDim, String indent) { ChartProcessor chartProcessor = processorController.chartProcessor; StringBuilder sBuilder = new StringBuilder(); sBuilder.append(indent);//from w ww . j av a2 s .c o m sBuilder.append("acqOrder("); sBuilder.append(chartProcessor.getAcqOrder(true)); sBuilder.append(")"); sBuilder.append(System.lineSeparator()); sBuilder.append(indent); sBuilder.append("acqarray("); sBuilder.append(chartProcessor.getArraySizes()); sBuilder.append(")"); sBuilder.append(System.lineSeparator()); for (String propName : propNames) { if (propName.equals("acqarray")) { continue; } sBuilder.append(indent); sBuilder.append(propName); sBuilder.append("("); for (int dim = 0; dim < nDim; dim++) { if (dim > 0) { sBuilder.append(","); } String value = getPropValue(dim, propName, false); boolean useString = true; // Ending with F or D allows a string to be parsed as a number if ((value.length() > 0) && !Character.isLetter(value.charAt(value.length() - 1))) { try { Double.parseDouble(value); useString = false; } catch (NumberFormatException nFE) { useString = true; } } if (propName.equals("label")) { useString = true; } if (useString) { sBuilder.append("'"); sBuilder.append(value); sBuilder.append("'"); } else { sBuilder.append(value); } } sBuilder.append(")"); sBuilder.append(System.lineSeparator()); } return sBuilder.toString(); }
From source file:com.isecpartners.gizmo.HttpResponse.java
public void processResponse(InputStream in) throws FailedRequestException { StringBuffer content = new StringBuffer(); DataInputStream inputStream = new DataInputStream(in); ArrayByteList blist = new ArrayByteList(); String header = null;/*from w ww. j av a 2s .c o m*/ int contentLength = 0; boolean isChunked = false; String line; try { line = readline(inputStream); while (line != null && !line.equals(ENDL)) { content.append(line); if (line.toUpperCase().contains(CONTENT_LENGTH) && line.toUpperCase().indexOf(CONTENT_LENGTH) == 0) { String value = line.substring(line.indexOf(CONTENT_LENGTH) + CONTENT_LENGTH.length() + 2, line.indexOf('\r')); contentLength = Integer.parseInt(value.trim()); } else if (line.toUpperCase().contains(TRANSFER_ENCODING)) { if (line.toUpperCase() .substring( line.toUpperCase().indexOf(TRANSFER_ENCODING) + "Transfer-Encoding:".length()) .contains("CHUNKED")) { isChunked = true; } } else if (line.toUpperCase().contains(CONTENT_ENCODING)) { String value = line.substring(line.indexOf(CONTENT_ENCODING) + CONTENT_ENCODING.length() + 2, line.indexOf('\r')); value = value.trim(); if (value.toUpperCase().equals("GZIP")) { this.gzip = true; } else if (value.toUpperCase().equals("DEFLATE")) { this.deflate = true; } } line = readline(inputStream); } if (line == null) { GizmoView.log(content.toString()); throw new FailedRequestException(); } content.append("\r\n"); header = content.substring(0, content.indexOf("\r\n")); append(blist, content); if (contentLength != 0) { for (int ii = 0; ii < contentLength; ii++) { blist.add(inputStream.readByte()); } } if (isChunked) { boolean isDone = false; while (!isDone) { byte current = inputStream.readByte(); blist.add(current); int size = 0; while (current != '\n') { if (current != '\r') { size *= 16; if (Character.isLetter((char) current)) { current = (byte) Character.toLowerCase((char) current); } if ((current >= '0') && (current <= '9')) { size += (current - 48); } else if ((current >= 'a') && (current <= 'f')) { size += (10 + current - 97); } } current = inputStream.readByte(); while ((char) current == ' ') { current = inputStream.readByte(); } blist.add(current); } if (size != 0) { for (int ii = 0; ii < size; ii++) { int byte1 = inputStream.readByte(); byte blah = (byte) byte1; blist.add(blah); } blist.add(inputStream.readByte()); blist.add(inputStream.readByte()); } else { byte ch = (byte) inputStream.read(); StringBuffer endstuff = new StringBuffer(); blist.add(ch); endstuff.append((char) ch); while (ch != '\n') { ch = inputStream.readByte(); endstuff.append((char) ch); blist.add(ch); } isDone = true; } } } if (inputStream.available() > 0) { try { while (true) { blist.add(inputStream.readByte()); } } catch (EOFException e) { System.out.println(e); } } } catch (IOException ex) { Logger.getLogger(HttpResponse.class.getName()).log(Level.SEVERE, null, ex); } setBlist(blist); setHeader(header); if (this.gzip) { addContents(unzipData(blist.toArray())); } else if (this.deflate) { addContents(deflateData(blist.toArray())); } else { addContents(content.toString()); } }
From source file:RandomStringUtils.java
/** * Creates a random string based on a variety of options, using * supplied source of randomness./*w w w . ja v a2 s.com*/ * * If start and end are both <code>0</code>, start and end are set * to <code>' '</code> and <code>'z'</code>, the ASCII printable * characters, will be used, unless letters and numbers are both * <code>false</code>, in which case, start and end are set to * <code>0</code> and <code>Integer.MAX_VALUE</code>. * * If set is not <code>null</code>, characters between start and * end are chosen. * * This method accepts a user-supplied {@link Random} * instance to use as a source of randomness. By seeding a single * {@link Random} instance with a fixed seed and using it for each call, * the same random sequence of strings can be generated repeatedly * and predictably. * * @param count the length of random string to create * @param start the position in set of chars to start at * @param end the position in set of chars to end before * @param letters only allow letters? * @param numbers only allow numbers? * @param chars the set of chars to choose randoms from. * If <code>null</code>, then it will use the set of all chars. * @param random a source of randomness. * @return the random string * @throws ArrayIndexOutOfBoundsException if there are not * <code>(end - start) + 1</code> characters in the set array. * @throws IllegalArgumentException if <code>count</code> < 0. * @since 2.0 */ public static String random(int count, int start, int end, boolean letters, boolean numbers, char[] chars, Random random) { if (count == 0) { return ""; } else if (count < 0) { throw new IllegalArgumentException("Requested random string length " + count + " is less than 0."); } if ((start == 0) && (end == 0)) { end = 'z' + 1; start = ' '; if (!letters && !numbers) { start = 0; end = Integer.MAX_VALUE; } } char[] buffer = new char[count]; int gap = end - start; while (count-- != 0) { char ch; if (chars == null) { ch = (char) (random.nextInt(gap) + start); } else { ch = chars[random.nextInt(gap) + start]; } if ((letters && Character.isLetter(ch)) || (numbers && Character.isDigit(ch)) || (!letters && !numbers)) { if (ch >= 56320 && ch <= 57343) { if (count == 0) { count++; } else { // low surrogate, insert high surrogate after putting it in buffer[count] = ch; count--; buffer[count] = (char) (55296 + random.nextInt(128)); } } else if (ch >= 55296 && ch <= 56191) { if (count == 0) { count++; } else { // high surrogate, insert low surrogate before putting it in buffer[count] = (char) (56320 + random.nextInt(128)); count--; buffer[count] = ch; } } else if (ch >= 56192 && ch <= 56319) { // private high surrogate, no effing clue, so skip it count++; } else { buffer[count] = ch; } } else { count++; } } return new String(buffer); }
From source file:ke.go.moh.oec.mpi.match.NameMatch.java
/** * Clean the string before finding its soundex (or metaphone) value. * This is necessary because the soundex library methods used make * the erroneous assumption that if the java Character.isLetter() * method returns true then the character will be in the range * A-Z or a-z. This is true for the basic 7-bit ASCII codes, but it may not * be for the extended 8-bit ASCII codes. If the character is an accented * letter in the extended 8-bit ASCII range, then isLetter() will return * true but the character will not be in the range A-Z or a-z. * <p>/*from ww w . j a v a2s . co m*/ * To compensate for this problem in the library methods called, this * method strips from the string any characters for which isLetter() * returns true, but the character is not in the range A-Z or a-z. * * @param str String for which the soundex value will be computed. * @return String with accented characters (if any) removed. */ private String clean(String str) { if (str.length() > 0) { int len = str.length(); char[] chars = new char[len]; int count = 0; for (int i = 0; i < len; i++) { char c = Character.toUpperCase(str.charAt(i)); if (!Character.isLetter(c) || (c >= 'A' && c <= 'Z')) { chars[count++] = str.charAt(i); } } if (count != len) { str = new String(chars, 0, count); } } return str; }
From source file:org.apache.nutch.tools.proxy.SegmentHandler.java
@Override public void handle(Request req, HttpServletResponse res, String target, int dispatch) throws IOException, ServletException { try {//w ww .j a v a 2 s . c o m String uri = req.getUri().toString(); LOG.info("URI: " + uri); addMyHeader(res, "URI", uri); Text url = new Text(uri.toString()); CrawlDatum cd = seg.getCrawlDatum(url); if (cd != null) { addMyHeader(res, "Res", "found"); LOG.info("-got " + cd.toString()); ProtocolStatus ps = (ProtocolStatus) cd.getMetaData().get(Nutch.WRITABLE_PROTO_STATUS_KEY); if (ps != null) { Integer TrCode = protoCodes.get(ps.getCode()); if (TrCode != null) { res.setStatus(TrCode.intValue()); } else { res.setStatus(HttpServletResponse.SC_OK); } addMyHeader(res, "ProtocolStatus", ps.toString()); } else { res.setStatus(HttpServletResponse.SC_OK); } Content c = seg.getContent(url); if (c == null) { // missing content req.setHandled(true); res.addHeader("X-Handled-By", getClass().getSimpleName()); return; } byte[] data = c.getContent(); LOG.debug("-data len=" + data.length); Metadata meta = c.getMetadata(); String[] names = meta.names(); LOG.debug("- " + names.length + " meta"); for (int i = 0; i < names.length; i++) { boolean my = true; char ch = names[i].charAt(0); if (Character.isLetter(ch) && Character.isUpperCase(ch)) { // pretty good chance it's a standard header my = false; } String[] values = meta.getValues(names[i]); for (int k = 0; k < values.length; k++) { if (my) { addMyHeader(res, names[i], values[k]); } else { res.addHeader(names[i], values[k]); } } } req.setHandled(true); res.addHeader("X-Handled-By", getClass().getSimpleName()); res.setContentType(meta.get(Metadata.CONTENT_TYPE)); res.setContentLength(data.length); OutputStream os = res.getOutputStream(); os.write(data, 0, data.length); res.flushBuffer(); } else { addMyHeader(res, "Res", "not found"); LOG.info(" -not found " + url); } } catch (Exception e) { e.printStackTrace(); LOG.warn(StringUtils.stringifyException(e)); addMyHeader(res, "Res", "Exception: " + StringUtils.stringifyException(e)); } }
From source file:edu.stanford.muse.index.IndexUtils.java
/** replaces all tokens in the given text that are not in any of the entities in the given doc. * all other tokens are replaced with REDACTION_CHAR. * token is defined as a consecutive sequence of letters or digits * Note: all other characters (incl. punctuation, special symbols) are blindly copied through * anything not captured in a token is considered non-sensitive and is passed through *//*from ww w. ja va 2 s . c o m*/ public static String retainOnlyNames(String text, org.apache.lucene.document.Document doc) { StringBuilder result = new StringBuilder(); Set<String> allowedTokens = new LinkedHashSet<>(); // assemble all the allowed tokens (lower cased) from these 3 types of entities { List<String> allEntities = Arrays.asList(Archive.getAllNamesInLuceneDoc(doc, true)).stream() .map(Span::getText).collect(Collectors.toList()); for (String e : allEntities) allowedTokens.addAll(Util.tokenize(e.toLowerCase())); // names may sometimes still have punctuation; strip it. e.g. a name like "Rep. Duncan" should lead to the tokens "rep" and "duncan" allowedTokens = allowedTokens.stream().map(s -> Util.stripPunctuation(s)).collect(Collectors.toSet()); } final char REDACTION_CHAR = '.'; int idx = 0; boolean previousTokenAllowed = false; outer: while (true) { StringBuilder token = new StringBuilder(); // go through all the chars one by one, either passing them through or assembling them in a token that can be looked up in allowedTokens { // skip until start of next token, passing through chars to result // the letter pointed to by idx has not yet been processed while (true) { if (idx >= text.length()) break outer; char ch = text.charAt(idx++); if (Character.isLetter(ch) || Character.isDigit(ch)) { // if other chars are judged sensitive in the future, this condition should be updated token.append(ch); break; } else result.append(ch); } } Character ch; { // now, idx is just past the start of a token (with the first letter stored in token), // keep reading letters until we find a non-letter, adding it to the token // the letter pointed to by idx has not yet been processed while (true) { ch = null; if (idx >= text.length()) break; // only break out of inner loop here, not the outer. this might be the last token, and token may have some residual content, so it has to be processed ch = text.charAt(idx++); if (!Character.isLetter(ch) && !Character.isDigit(ch)) break; token.append(ch); } } // ch contains the first char beyond the token (if it is not null). If it is null, it means we have reached the end of the string // look up the token and allow it only if allowedTokens contains it // use lower case token for comparison, but when appending to result, use the original string with the original case // worried about "A" grade, we should disallow it although it could easily be a token in a name somewhere String lowerCaseToken = token.toString().toLowerCase(); // ctoken = canonicalized token boolean allowToken = allowedTokens.contains(lowerCaseToken); // however, if this token is a stop word, only allow if previous token was allowed because we don't want to start from a stop word. // note: this will still allow the stop word if it is at the beginning of a sentence, and the prev. sentence ended in an allowed token if (allowToken && DictUtils.isJoinWord(lowerCaseToken)) allowToken = previousTokenAllowed; if (allowToken) result.append(token); else for (int j = 0; j < token.length(); j++) result.append(REDACTION_CHAR); previousTokenAllowed = allowToken; if (ch != null) result.append(ch); } return result.toString(); }