List of usage examples for java.lang Character isAlphabetic
public static boolean isAlphabetic(int codePoint)
From source file:org.apache.brooklyn.util.net.Urls.java
/** as {@link #isUrlWithProtocol(String)} but configurable to be strict (false, false) or allow newline chars (if e.g. in an unescaped argument) */ public static boolean isUrlWithProtocol(String x, boolean allowSpacesAfterCharAfterColon, boolean allowMultiline) { if (x == null) return false; for (int i = 0; i < x.length(); i++) { char c = x.charAt(i); if (c == ':') { if (i == 0 || i + 1 >= x.length()) return false; char c2 = x.charAt(i + 1); // never allow a whitespace or quote mark right after the ':', that is too similar to json/yaml! if (Character.isWhitespace(c2) || c2 == '\'' || c2 == '\"') return false; if (!allowMultiline) { if (x.indexOf('\n') >= 0) return false; if (x.indexOf('\r') >= 0) return false; }/* w w w . j a v a2 s .co m*/ if (!allowSpacesAfterCharAfterColon) { if (x.indexOf(' ') >= 0) return false; if (x.indexOf('\t') >= 0) return false; } return true; } // protocol schema as per https://en.wikipedia.org/wiki/Uniform_Resource_Locator if (i == 0) { if (!Character.isLetter(c)) return false; } else if (!Character.isAlphabetic(c) && !Character.isDigit(c) && c != '+' && c != '.' && c != '-') { return false; } } // no colon found return false; }
From source file:org.apache.openmeetings.screenshare.job.OmKeyEvent.java
public OmKeyEvent(Map<String, Object> obj) { alt = TRUE.equals(obj.get("alt")); ctrl = TRUE.equals(obj.get("ctrl")); shift = TRUE.equals(obj.get("shift")) || isUpperCase(ch); ch = (char) getInt(obj, "char"); key = inKey = getInt(obj, "key"); Integer _key = null;// ww w .jav a 2 s . c o m if (CharUtils.isAsciiPrintable(ch)) { boolean alpha = Character.isAlphabetic(ch); if (alpha) { // can't be combined due to different types key = getKeyStroke(toUpperCase(ch), 0).getKeyCode(); } else { key = getKeyStroke(Character.valueOf(ch), 0).getKeyCode(); } if (key == 0) { _key = CHAR_MAP.get(ch); if (_key == null) { // fallback key = inKey; } } if (!alpha && _key == null) { _key = KEY_MAP.get(key); } } else { _key = KEY_MAP.get(key); } this.key = _key == null ? key : _key; log.debug("sequence:: shift {}, ch {}, orig {} -> key {}({}), map {}", shift, ch == 0 ? ' ' : ch, inKey, key, Integer.toHexString(key), _key); }
From source file:org.apdplat.superword.tools.Definition.java
public static List<String> parseDefinitionForWebster(String html, String cssPath) { List<String> list = new ArrayList<>(); try {/* w w w .j a v a2 s. c o m*/ for (Element element : Jsoup.parse(html) .select("div.tense-box.quick-def-box.simple-def-box.card-box.def-text div.inner-box-wrapper")) { StringBuilder definition = new StringBuilder(); String partOfSpeech = element.select("div.word-attributes span.main-attr em").text().trim(); for (Element defElement : element.select( "div.definition-block.def-text ul.definition-list.no-count li p.definition-inner-item span")) { String def = defElement.text().trim(); if (def.length() < 3) { continue; } if (Character.isAlphabetic(def.charAt(0))) { def = ": " + def; } else { int index = 0; while (!Character.isAlphabetic(def.charAt(++index))) { // } def = ": " + def.substring(index); } definition.append(partOfSpeech).append(" ").append(def); list.add(definition.toString()); definition.setLength(0); } } } catch (Exception e) { LOGGER.error("?", e); } return list; }
From source file:org.apdplat.superword.tools.PdfParser.java
private static void addLineToParagraph(String line, String lastLine, String nextLine, StringBuilder paragraph) { if (StringUtils.isBlank(line)) { return;//from w ww .ja v a2s . c o m } if (nextLine != null) { //???? if (Character.isDigit(line.charAt(0)) && Character.isAlphabetic(line.charAt(line.length() - 1)) //? && (StringUtils.isBlank(nextLine) || Character.isDigit(nextLine.charAt(0)) || Character.isUpperCase(nextLine.charAt(0)))) { LOGGER.debug("???" + line); return; } } paragraph.append(line).append(" "); }
From source file:org.apdplat.superword.tools.PdfParser.java
/** * ???// www . java2s. c o m * @param paragraph * @return */ private static List<String> segSentence(String paragraph) { List<String> data = new ArrayList<>(); //?? paragraph = prepareSeg(paragraph); if (StringUtils.isBlank(paragraph)) { return data; } //??? for (String s : paragraph.split("[.]")) { if (StringUtils.isBlank(s)) { continue; } LOGGER.debug("???" + s); s = processSentence(s); if (s == null) { continue; } //?? if (Character.isAlphabetic(s.charAt(s.length() - 1))) { s += "."; } //. s = s.replace("??", "."); data.add(s); LOGGER.debug("??" + s); if (LOGGER.isDebugEnabled()) { int length = s.split("\\s+").length; //?? SENTENCE_LENGTH_INFO.putIfAbsent(length, new AtomicInteger()); SENTENCE_LENGTH_INFO.get(length).incrementAndGet(); } } return data; }
From source file:org.languagetool.rules.spelling.hunspell.HunspellRule.java
/** * @since public since 4.1//from w ww . j a v a 2 s. c om */ @Experimental public boolean isMisspelled(String word) { try { if (needsInit) { init(); } boolean isAlphabetic = true; if (word.length() == 1) { // hunspell dictionaries usually do not contain punctuation isAlphabetic = Character.isAlphabetic(word.charAt(0)); } return (isAlphabetic && !"--".equals(word) && hunspellDict.misspelled(word) && !ignoreWord(word)) || isProhibited(removeTrailingDot(word)); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:org.mongojx.fluent.core.MongoJxParser.java
public static QueryTuple bind(String filter, Object... parameters) { ParsingEvent state = null;/*from www. j ava2 s . c o m*/ int keyStartIndex = 0; int keyStopIndex = 0; int valueStartIndex = 0; int parameterIndex = 0; Document finalDocument = new Document(); Document document = finalDocument; int depth = 0; int arrays = 1; List<Document> documents = new ArrayList(); for (int i = 0; i < filter.length(); i++) { char character = filter.charAt(i); if (isJsonObjectStarting(character)) { if (state == KEY_VALUE_DELIMITER) { Document nestedDocument = new Document(); document.put(filter.substring(keyStartIndex, keyStopIndex), nestedDocument); document = nestedDocument; } else if (state == IN_ARRAY_AND_SEPARATOR) { document = new Document(); finalDocument = document; } state = START_OBJECT; keyStartIndex = i + 1; depth++; } else if (isDoubleStringCharacter(character)) { if (isDoubleStringEnding(state)) { state = END_DOUBLE_QUOTE_STRING; document.put(filter.substring(keyStartIndex, keyStopIndex), filter.substring(valueStartIndex, i)); } else { valueStartIndex = i + 1; state = START_DOUBLE_QUOTE_STRING; } } else if (isSingleStringCharacter(character)) { if (isSingleStringEnding(state)) { state = END_SINGLE_QUOTE_STRING; } else if (isSingleStringStarting(state)) { keyStartIndex = i; state = START_SINGLE_QUOTE_STRING; } } else if (isJsonObjectEnding(character)) { depth--; if (depth == 0) { documents.add(finalDocument); state = END_OF_IN_ARRAY_OBJECT; } } else if (isArrayStarting(character)) { arrays--; if (arrays == 0) { return new QueryTuple(i + 1, documents); } } else if (isArrayEnding(character)) { QueryTuple queryTuple = bind(filter.substring(i + 1), parameters); document.put(filter.substring(keyStartIndex, keyStopIndex), queryTuple.getDocuments()); i = i + queryTuple.getIndex(); } else if (isKeyValueSeparator(character)) { if (state != KEY_END) { keyStopIndex = i; } document.put(filter.substring(keyStartIndex, keyStopIndex), ""); state = KEY_VALUE_DELIMITER; } else if (isParameter(character)) { state = PARAMETER; document.put(filter.substring(keyStartIndex, keyStopIndex), parameters[parameterIndex]); parameterIndex++; } else if (isSeparator(character)) { keyStartIndex = i + 1; if (state == END_OF_IN_ARRAY_OBJECT) { state = IN_ARRAY_AND_SEPARATOR; } else { state = AND_SEPARATOR; } } else if (Character.isWhitespace(character)) { if (state == AND_SEPARATOR) { keyStartIndex = i + 1; } else if (state == START_OBJECT) { keyStartIndex = i + 1; } else if (state == START_INLINE_NUMERIC_VALUE) { state = END_INLINE_NUMERIC_VALUE; String value = filter.substring(valueStartIndex, i); document.put(filter.substring(keyStartIndex, keyStopIndex), NumberUtils.createNumber(value)); } else if (state == KEY_START) { keyStopIndex = i; state = KEY_END; } } else if (Character.isDigit(character) && state == KEY_VALUE_DELIMITER) { state = START_INLINE_NUMERIC_VALUE; valueStartIndex = i; } else if (Character.isAlphabetic(character) && (state == AND_SEPARATOR || state == START_OBJECT)) { state = KEY_START; } } return new QueryTuple(filter.length(), documents); }
From source file:org.ovirt.api.metamodel.analyzer.ModelAnalyzer.java
/** * Creates a document with the given name and, populates it with the content read from the given input stream, and * adds it to the model./*from w ww .ja v a2s . c o m*/ * * @param file the name of file containing the document, including the extension * @param in the input stream that will be used to populate the document * @throws IOException if something fails while reading the content of the document */ private void analyzeDocument(String file, InputStream in) throws IOException { // Create the document: Document document = new Document(); // Remove the extension from the file name: file = FilenameUtils.getBaseName(file); // The name of the document can contain a prefix to explicitly indicate the order of the document relative to // the other documents of the model. This prefix should be separated from the rest of the name using a dash, and // that dash should be ignored. String prefix = null; int index = file.indexOf('-'); if (index > 0) { prefix = file.substring(0, index); file = file.substring(index + 1); } Name name = NameParser.parseUsingCase(file); if (prefix != null && !prefix.isEmpty()) { List<String> words = name.getWords(); words.add(0, prefix); name.setWords(words); if (Character.isAlphabetic(prefix.charAt(0))) { document.setAppendix(true); } } document.setName(name); // Read the source of the document: String source = IOUtils.toString(in, StandardCharsets.UTF_8); document.setSource(source); // Add the document to the model: model.addDocument(document); }
From source file:org.structr.files.text.FulltextTokenizer.java
@Override public void write(final char[] cbuf, final int off, final int len) throws IOException { if (wordCount < wordCountLimit) { final int limit = off + len; final int length = Math.min(limit, cbuf.length); for (int i = off; i < length; i++) { final char c = cbuf[i]; // remove occurrences of more than 10 identical chars in a row if (c == lastCharacter) { if (consecutiveCharCount++ >= 10) { continue; }//from w w w . j av a 2 s . co m } else { consecutiveCharCount = 0; } if (!Character.isAlphabetic(c) && !Character.isDigit(c) && !SpecialChars.contains(c)) { flush(); if (Character.isWhitespace(c)) { rawText.append(c); } else { rawText.append(" "); } } else { wordBuffer.append(c); rawText.append(c); } lastCharacter = c; } } }
From source file:org.structr.text.FulltextIndexerModule.java
@Override public GraphObjectMap getContextObject(final String searchTerm, final String text, final int contextLength) { final GraphObjectMap contextObject = new GraphObjectMap(); final Set<String> contextValues = new LinkedHashSet<>(); final String[] searchParts = searchTerm.split("[\\s,;]+"); final GenericProperty contextKey = new GenericProperty("context"); for (final String searchString : searchParts) { final String lowerCaseSearchString = searchString.toLowerCase(); final String lowerCaseText = text.toLowerCase(); final StringBuilder wordBuffer = new StringBuilder(); final StringBuilder lineBuffer = new StringBuilder(); final int textLength = text.length(); /*/*from w ww .ja va 2 s . co m*/ * we take an average word length of 8 characters, multiply * it by the desired prefix and suffix word count, add 20% * and try to extract up to prefixLength words. */ // modify these parameters to tune prefix and suffix word extraction // loop variables int newlineCount = 0; int wordCount = 0; // wordCount starts at 1 because we include the matching word int pos = -1; do { // find next occurrence pos = lowerCaseText.indexOf(lowerCaseSearchString, pos + 1); if (pos > 0) { lineBuffer.setLength(0); wordBuffer.setLength(0); wordCount = 0; newlineCount = 0; // fetch context words before search hit for (int i = pos; i >= 0; i--) { final char c = text.charAt(i); if (!Character.isAlphabetic(c) && !Character.isDigit(c) && !FulltextTokenizer.SpecialChars.contains(c)) { wordCount += flushWordBuffer(lineBuffer, wordBuffer, true); // store character in buffer wordBuffer.insert(0, c); if (c == '\n') { // increase newline count newlineCount++; } else { // reset newline count newlineCount = 0; } // paragraph boundary reached if (newlineCount > 1) { break; } // stop if we collected half of the desired word count if (wordCount > contextLength / 2) { break; } } else { // store character in buffer wordBuffer.insert(0, c); // reset newline count newlineCount = 0; } } wordCount += flushWordBuffer(lineBuffer, wordBuffer, true); wordBuffer.setLength(0); // fetch context words after search hit for (int i = pos + 1; i < textLength; i++) { final char c = text.charAt(i); if (!Character.isAlphabetic(c) && !Character.isDigit(c) && !FulltextTokenizer.SpecialChars.contains(c)) { wordCount += flushWordBuffer(lineBuffer, wordBuffer, false); // store character in buffer wordBuffer.append(c); if (c == '\n') { // increase newline count newlineCount++; } else { // reset newline count newlineCount = 0; } // paragraph boundary reached if (newlineCount > 1) { break; } // stop if we collected enough words if (wordCount > contextLength) { break; } } else { // store character in buffer wordBuffer.append(c); // reset newline count newlineCount = 0; } } wordCount += flushWordBuffer(lineBuffer, wordBuffer, false); // replace single newlines with space contextValues.add(lineBuffer.toString().trim()); } } while (pos >= 0); } contextObject.put(contextKey, contextValues); return contextObject; }