List of usage examples for java.lang CharSequence subSequence
CharSequence subSequence(int start, int end);
From source file:dk.netarkivet.harvester.harvesting.extractor.IcelandicExtractorJS.java
public long considerStrings(Extractor ext, CrawlURI curi, CharSequence cs, boolean handlingJSFile) { long foundLinks = 0; Matcher strings = TextUtils.getMatcher(JAVASCRIPT_STRING_EXTRACTOR, cs); int startIndex = 0; while (strings.find(startIndex)) { CharSequence subsequence = cs.subSequence(strings.start(2), strings.end(2)); Matcher uri = TextUtils.getMatcher(STRING_URI_DETECTOR, subsequence); if (uri.matches()) { String string = uri.group(); boolean falsePositive = false; try { string = StringEscapeUtils.unescapeJavaScript(string); } catch (NestableRuntimeException e) { LOGGER.log(Level.WARNING, "problem unescaping some javascript", e); }/*from w w w. ja v a 2 s . c o m*/ string = UriUtils.speculativeFixup(string, curi.getUURI()); // Filter out some bad false positives (should really fix regexp for URI detection) if (string.contains("/.") || string.contains("@") || string.length() > 150) { // While legal in URIs, these are rare and usually an indication of a false positive // in the speculative extraction. falsePositive = true; } if (!falsePositive) { falsePositive = shouldIgnorePossibleRelativeLink(string); } if (falsePositive) { foundFalsePositives++; } else { foundLinks++; try { int max = ext.getExtractorParameters().getMaxOutlinks(); if (handlingJSFile) { addRelativeToVia(curi, max, string, JS_MISC, SPECULATIVE); } else { addRelativeToBase(curi, max, string, JS_MISC, SPECULATIVE); } } catch (URIException e) { ext.logUriError(e, curi.getUURI(), string); } } } else { foundLinks += considerStrings(ext, curi, subsequence, handlingJSFile); } // reconsider the last closing quote as possible opening quote startIndex = strings.end(2); } TextUtils.recycleMatcher(strings); return foundLinks; }
From source file:org.abstractmeta.toolbox.compilation.compiler.impl.JavaSourceCompilerImpl.java
protected boolean buildDiagnosticMessage(Diagnostic diagnostic, StringBuilder diagnosticBuilder, JavaFileObjectRegistry registry) { Object source = diagnostic.getSource(); String sourceErrorDetails = ""; if (source != null) { JavaSourceFileObject sourceFile = JavaSourceFileObject.class.cast(source); CharSequence sourceCode = sourceFile.getCharContent(true); int startPosition = Math.max((int) diagnostic.getStartPosition() - 10, 0); int endPosition = Math.min(sourceCode.length(), (int) diagnostic.getEndPosition() + 10); sourceErrorDetails = sourceCode.subSequence(startPosition, endPosition) + ""; }/*from www . j a va2s .com*/ diagnosticBuilder.append(diagnostic.getMessage(null)); diagnosticBuilder.append("\n"); diagnosticBuilder.append(sourceErrorDetails); return diagnostic.getKind().equals(Diagnostic.Kind.ERROR); }
From source file:org.archive.extractor.RegexHTMLLinkExtractor.java
/** * @param sequence/* w w w.ja va2 s .c o m*/ * @param endOfOpenTag */ protected void processStyle(CharSequence sequence, int endOfOpenTag) { // First, get attributes of script-open tag as per any other tag. processGeneralTag(sequence.subSequence(0, 6), sequence.subSequence(0, endOfOpenTag)); // then, parse for URIs RegexCSSLinkExtractor.extract(sequence.subSequence(endOfOpenTag, sequence.length()), source, base, next, extractErrorListener); }
From source file:com.cyberway.issue.extractor.RegexpHTMLLinkExtractor.java
/** * @param sequence//from ww w . jav a2 s .co m * @param endOfOpenTag */ protected void processStyle(CharSequence sequence, int endOfOpenTag) { // First, get attributes of script-open tag as per any other tag. processGeneralTag(sequence.subSequence(0, 6), sequence.subSequence(0, endOfOpenTag)); // then, parse for URIs RegexpCSSLinkExtractor.extract(sequence.subSequence(endOfOpenTag, sequence.length()), source, base, next, extractErrorListener); }
From source file:com.cyberway.issue.extractor.RegexpHTMLLinkExtractor.java
protected void processScript(CharSequence sequence, int endOfOpenTag) { // first, get attributes of script-open tag // as per any other tag processGeneralTag(sequence.subSequence(0, 6), sequence.subSequence(0, endOfOpenTag)); // then, apply best-effort string-analysis heuristics // against any code present (false positives are OK) processScriptCode(sequence.subSequence(endOfOpenTag, sequence.length())); }
From source file:br.msf.commons.util.CharSequenceUtils.java
public static String subStringBeforeFirst(final CharSequence toSearch, final CharSequence sequence, final Boolean caseSensitive) { final int idx = indexOf(toSearch, sequence, caseSensitive); return (idx < 0) ? null : CharSequenceUtils.castToString(sequence.subSequence(0, idx)); }
From source file:br.msf.commons.util.CharSequenceUtils.java
public static String substringBeforeFirst(final CharSequence toSearch, final CharSequence sequence, final Boolean caseSensitive) { final int idx = indexOf(toSearch, sequence, caseSensitive); return (idx < 0) ? null : CharSequenceUtils.castToString(sequence.subSequence(0, idx)); }
From source file:br.msf.commons.util.CharSequenceUtils.java
public static String subStringBeforeLast(final CharSequence toSearch, final CharSequence sequence, final Boolean caseSensitive) { final int idx = lastIndexOf(toSearch, sequence, caseSensitive); return (idx < 0) ? null : CharSequenceUtils.castToString(sequence.subSequence(0, idx)); }
From source file:br.msf.commons.util.CharSequenceUtils.java
public static String substringBeforeLast(final CharSequence toSearch, final CharSequence sequence, final Boolean caseSensitive) { final int idx = lastIndexOf(toSearch, sequence, caseSensitive); return (idx < 0) ? null : CharSequenceUtils.castToString(sequence.subSequence(0, idx)); }
From source file:com.pidoco.juri.JURI.java
/** * <pre>//from ww w.j a va 2 s .c om * "".addRawPath("") -> "" * "/".addRawPath("") -> "/" * "".addRawPath("/") -> "/" * "a".addRawPath("") -> "a/" * "a".addRawPath("b") -> "a/b" * "/".addRawPath("/") -> "/" * </pre> */ public static String concatRawPaths(CharSequence left, CharSequence right) { boolean needsSeparator = false; boolean rightStartsWithSlash = StringUtils.startsWith(right, "/"); int rightStart = 0; if (left.length() > 0) { if (StringUtils.endsWith(left, "/")) { if (rightStartsWithSlash) { rightStart = 1; } } else { if (!rightStartsWithSlash) { needsSeparator = true; } } } return left + (needsSeparator ? "/" : "") + right.subSequence(rightStart, right.length()); }