Fast String Search
/*
* @(#)$Id$
*
* eaio: StringSearch - high-performance pattern matching algorithms in Java
* Copyright (c) 2003, 2004 Johann Burkard (jb@eaio.com) http://eaio.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* --
* Copyright 2006-2008 Makoto YUI
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Contributors:
* Makoto YUI - porting
*/
//package xbird.util.string;
/**
*
* <DIV lang="en"></DIV>
* <DIV lang="ja"></DIV>
*
* @author Makoto YUI (yuin405+xbird@gmail.com)
*/
public final class FastStringSearch {
public static int quickSearch(final byte[] text, final byte[] pattern) {
return quickSearch(text, 0, text.length, pattern);
}
/**
* An implementation of Sunday's simplified "Quick Finder" version of the
* Boyer-Moore algorithm. See "A very fast substring search algorithm" (appeared
* in <em>Communications of the ACM. 33 (8):132-142</em>).
* <pre>
* Preprocessing: O(m + ∑) time
* Processing : O(mn) worst case
* </pre>
*
* @author <a href="mailto:jb@eaio.com">Johann Burkard</a>
*/
public static int quickSearch(final byte[] text, final int textStart, final int textEnd, final byte[] pattern) {
final int[] skip = processBytes(pattern);
final int ptnlen = pattern.length;
int from = textStart;
int p;
while(from + ptnlen <= textEnd) {
p = 0;
while(p < ptnlen && pattern[p] == text[from + p]) {
++p;
}
if(p == ptnlen) {
return from;
}
if(from + ptnlen >= textEnd) {
return -1;
}
from += skip[index(text[from + ptnlen])];
}
return -1;
}
/**
* Returns a <code>int</code> array.
*/
private static int[] processBytes(final byte[] pattern) {
final int[] skip = new int[256];
final int ptnlen = pattern.length;
for(int i = 0; i < 256; ++i) {
skip[i] = ptnlen + 1;
}
for(int i = 0; i < ptnlen; ++i) {
skip[index(pattern[i])] = ptnlen - i;
}
return skip;
}
/**
* Converts the given <code>byte</code> to an <code>int</code>.
*/
private static int index(final byte idx) {
return (idx < 0) ? 256 + idx : idx;
}
}
Related examples in the same category
1. | String Region Match Demo | | |
2. | Palindrome | | |
3. | Look for particular sequences in sentences | | |
4. | Strings -- extract printable strings from binary file | | |
5. | Java Search String | | |
6. | Java String endsWith | | |
7. | Java String startsWith | | |
8. | Search a substring Anywhere | | |
9. | Starts with, ignore case( regular expressions ) | | |
10. | Ends with, ignore case( regular expressions ) | | |
11. | Anywhere, ignore case( regular expressions ) | | |
12. | Searching a String for a Character or a Substring | | |
13. | Not found returns -1 | | |
14. | If a string contains a specific word | | |
15. | Not found | | |
16. | if a String starts with a digit or uppercase letter | | |
17. | Search a String to find the first index of any character in the given set of characters. | | |
18. | Search a String to find the first index of any character not in the given set of characters. | | |
19. | Searches a String for substrings delimited by a start and end tag, returning all matching substrings in an array. | | |
20. | Helper functions to query a strings end portion. The comparison is case insensitive. | | |
21. | Helper functions to query a strings start portion. The comparison is case insensitive. | | |
22. | Wrapper for arrays of ordered strings. This verifies the arrays and supports efficient lookups. | | |
23. | Returns an index into arra (or -1) where the character is not in the charset byte array. | | |
24. | Returns an int[] array of length segments containing the distribution count of the elements in unsorted int[] array with values between min and max (range). | | |
25. | Returns the next index of a character from the chars string | | |
26. | Finds the first index within a String, handling null. | | |
27. | Finds the last index within a String from a start position, handling null. | | |
28. | Finds the n-th index within a String, handling null. | | |
29. | Case insensitive check if a String ends with a specified suffix. | | |
30. | Case insensitive check if a String starts with a specified prefix. | | |
31. | Case insensitive removal of a substring if it is at the begining of a source string, otherwise returns the source string. | | |
32. | Case insensitive removal of a substring if it is at the end of a source string, otherwise returns the source string. | | |
33. | Check if a String ends with a specified suffix. | | |
34. | Check if a String starts with a specified prefix. | | |
35. | Determine if a String is contained in a String Collection | | |
36. | Determine if a String is contained in a String Collection, ignoring case | | |
37. | Determine if a String is contained in a String [], ignoring case | | |
38. | Determine if a String is contained in a String [], ignoring case or not as specified | | |
39. | Determine if a String is contained in a String[] | | |
40. | Determines if the specified string contains only Unicode letters or digits as defined by Character#isLetterOrDigit(char) | | |
41. | Determining the validity of various XML names | | |
42. | Return the nth index of the given token occurring in the given string | | |
43. | Find the earliest index of any of a set of potential substrings. | | |
44. | Find the latest index of any of a set of potential substrings. | | |
45. | Performs continuous matching of a pattern in a given string. | | |
46. | Count match | | |