Strings -- extract printable strings from binary file
/*
* Copyright (c) Ian F. Darwin, http://www.darwinsys.com/, 1996-2002.
* All rights reserved. Software written by Ian F. Darwin and others.
* $Id: LICENSE,v 1.8 2004/02/09 03:33:38 ian Exp $
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Java, the Duke mascot, and all variants of Sun's Java "steaming coffee
* cup" logo are trademarks of Sun Microsystems. Sun's, and James Gosling's,
* pioneering role in inventing and promulgating (and standardizing) the Java
* language and environment is gratefully acknowledged.
*
* The pioneering role of Dennis Ritchie and Bjarne Stroustrup, of AT&T, for
* inventing predecessor languages C and C++ is also gratefully acknowledged.
*/
import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
/**
* Strings -- extract printable strings from binary file
*
* @author Ian F. Darwin, http://www.darwinsys.com/
* @version $Id: Strings.java,v 1.3 2004/02/08 23:57:29 ian Exp $
*/
public class Strings {
protected int minLength = 4;
/**
* Return true if the character is printable IN ASCII. Not using
* Character.isLetterOrDigit(); applies to all unicode ranges
*/
protected boolean isStringChar(char ch) {
if (ch >= 'a' && ch <= 'z')
return true;
if (ch >= 'A' && ch <= 'Z')
return true;
if (ch >= '0' && ch <= '9')
return true;
switch (ch) {
case '/':
case '-':
case ':':
case '.':
case ',':
case '_':
case '$':
case '%':
case '\'':
case '(':
case ')':
case '[':
case ']':
case '<':
case '>':
return true;
}
return false;
}
/** Process one file */
protected void process(String fileName, InputStream inStream) {
try {
int i;
char ch;
// This line alone cuts the runtime by about 66% on large files.
BufferedInputStream is = new BufferedInputStream(inStream);
StringBuffer sb = new StringBuffer();
// Read a byte, cast it to char, check if part of printable string.
while ((i = is.read()) != -1) {
ch = (char) i;
if (isStringChar(ch) || (sb.length() > 0 && ch == ' '))
// If so, build up string.
sb.append(ch);
else {
// if not, see if anything to output.
if (sb.length() == 0)
continue;
if (sb.length() >= minLength) {
report(fileName, sb);
}
sb.setLength(0);
}
}
is.close();
} catch (IOException e) {
System.out.println("IOException: " + e);
}
}
/**
* This simple main program looks after filenames and opening files and such
* like for you.
*/
public static void main(String[] av) {
Strings o = new Strings();
if (av.length == 0) {
o.process("standard input", System.in);
} else {
for (int i = 0; i < av.length; i++)
try {
o.process(av[i], new FileInputStream(av[i]));
} catch (FileNotFoundException e) {
System.err.println(e);
}
}
}
/** Output a match. Made a separate method for use by subclassers. */
protected void report(String fName, StringBuffer theString) {
System.out.println(fName + ": " + theString);
}
}
Related examples in the same category
1. | String Region Match Demo | | |
2. | Palindrome | | |
3. | Look for particular sequences in sentences | | |
4. | Java Search String | | |
5. | Java String endsWith | | |
6. | Java String startsWith | | |
7. | Search a substring Anywhere | | |
8. | Starts with, ignore case( regular expressions ) | | |
9. | Ends with, ignore case( regular expressions ) | | |
10. | Anywhere, ignore case( regular expressions ) | | |
11. | Searching a String for a Character or a Substring | | |
12. | Not found returns -1 | | |
13. | If a string contains a specific word | | |
14. | Not found | | |
15. | if a String starts with a digit or uppercase letter | | |
16. | Search a String to find the first index of any character in the given set of characters. | | |
17. | Search a String to find the first index of any character not in the given set of characters. | | |
18. | Searches a String for substrings delimited by a start and end tag, returning all matching substrings in an array. | | |
19. | Helper functions to query a strings end portion. The comparison is case insensitive. | | |
20. | Helper functions to query a strings start portion. The comparison is case insensitive. | | |
21. | Wrapper for arrays of ordered strings. This verifies the arrays and supports efficient lookups. | | |
22. | Returns an index into arra (or -1) where the character is not in the charset byte array. | | |
23. | Returns an int[] array of length segments containing the distribution count of the elements in unsorted int[] array with values between min and max (range). | | |
24. | Returns the next index of a character from the chars string | | |
25. | Finds the first index within a String, handling null. | | |
26. | Finds the last index within a String from a start position, handling null. | | |
27. | Finds the n-th index within a String, handling null. | | |
28. | Case insensitive check if a String ends with a specified suffix. | | |
29. | Case insensitive check if a String starts with a specified prefix. | | |
30. | Case insensitive removal of a substring if it is at the begining of a source string, otherwise returns the source string. | | |
31. | Case insensitive removal of a substring if it is at the end of a source string, otherwise returns the source string. | | |
32. | Check if a String ends with a specified suffix. | | |
33. | Check if a String starts with a specified prefix. | | |
34. | Determine if a String is contained in a String Collection | | |
35. | Determine if a String is contained in a String Collection, ignoring case | | |
36. | Determine if a String is contained in a String [], ignoring case | | |
37. | Determine if a String is contained in a String [], ignoring case or not as specified | | |
38. | Determine if a String is contained in a String[] | | |
39. | Determines if the specified string contains only Unicode letters or digits as defined by Character#isLetterOrDigit(char) | | |
40. | Determining the validity of various XML names | | |
41. | Return the nth index of the given token occurring in the given string | | |
42. | Find the earliest index of any of a set of potential substrings. | | |
43. | Find the latest index of any of a set of potential substrings. | | |
44. | Fast String Search | | |
45. | Performs continuous matching of a pattern in a given string. | | |
46. | Count match | | |