Convert Encoding
/*
* Copyright (c) 2000 David Flanagan. All rights reserved.
* This code is from the book Java Examples in a Nutshell, 2nd Edition.
* It is provided AS-IS, WITHOUT ANY WARRANTY either expressed or implied.
* You may study, use, and modify it for any non-commercial purpose.
* You may distribute it non-commercially as long as you retain this notice.
* For a commercial use license, or to purchase the book (recommended),
* visit http://www.davidflanagan.com/javaexamples2.
*/
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
/** A program to convert from one character encoding to another */
public class ConvertEncoding {
public static void main(String[] args) {
String from = null, to = null;
String infile = null, outfile = null;
for (int i = 0; i < args.length; i++) { // Parse command-line arguments.
if (i == args.length - 1)
usage(); // All args require another.
if (args[i].equals("-from"))
from = args[++i];
else if (args[i].equals("-to"))
to = args[++i];
else if (args[i].equals("-in"))
infile = args[++i];
else if (args[i].equals("-out"))
outfile = args[++i];
else
usage();
}
try {
convert(infile, outfile, from, to);
} // Attempt conversion.
catch (Exception e) { // Handle exceptions.
System.exit(1);
}
}
public static void usage() {
System.err.println("Usage: java ConvertEncoding <options>\n"
+ "Options:\n\t-from <encoding>\n\t" + "-to <encoding>\n\t"
+ "-in <file>\n\t-out <file>");
System.exit(1);
}
public static void convert(String infile, String outfile, String from,
String to) throws IOException, UnsupportedEncodingException {
// Set up byte streams.
InputStream in;
if (infile != null)
in = new FileInputStream(infile);
else
in = System.in;
OutputStream out;
if (outfile != null)
out = new FileOutputStream(outfile);
else
out = System.out;
// Use default encoding if no encoding is specified.
if (from == null)
from = System.getProperty("file.encoding");
if (to == null)
to = System.getProperty("file.encoding");
// Set up character streams.
Reader r = new BufferedReader(new InputStreamReader(in, from));
Writer w = new BufferedWriter(new OutputStreamWriter(out, to));
// Copy characters from input to output. The InputStreamReader
// converts from the input encoding to Unicode, and the
// OutputStreamWriter converts from Unicode to the output encoding.
// Characters that cannot be represented in the output encoding are
// output as '?'
char[] buffer = new char[4096];
int len;
while ((len = r.read(buffer)) != -1)
// Read a block of input.
w.write(buffer, 0, len); // And write it out.
r.close(); // Close the input.
w.close(); // Flush and close output.
}
}
Related examples in the same category
1. | Utility class for working with character sets | | |
2. | Utility methods for ASCII character checking. | | |
3. | Reader for UCS-2 and UCS-4 encodings. (i.e., encodings from ISO-10646-UCS-(2|4)). | | |
4. | Conversions between IANA encoding names and Java encoding names, and vice versa. | | |
5. | ASCII character handling functions | | |
6. | This class represents an encoding. | | |
7. | Provides information about encodings. | | |
8. | Codec for the Quoted-Printable section of http://www.ietf.org/rfc/rfc1521.txt (RFC 1521) | | |
9. | ISO 8859-8, ASCII plus Hebrew | | |
10. | TIS-620 does not have the non-breaking space or the C1 controls. | | |
11. | ISO-8859-1; a.k.a. Latin-1 | | |
12. | ISO 8859-2, a.k.a. Latin-2 | | |
13. | ISO 8859-3 | | |
14. | ISO 8859-4, Latin plus the characters needed for Greenlandic, Icelandic, and Lappish. | | |
15. | ISO 8859-9 for Turkish. | | |
16. | ISO-8859-10, for Lithuanian, Estonian, Greenlandic, Icelandic, Inuit, Lappish, and other Northern European languages. | | |
17. | ISO-8859-13, for Latvian and other Baltic languages. | | |
18. | ISO-8859-14, for Gaelic, Welsh, and other Celtic languages. | | |
19. | ISO 8859-9 for Western Europe. Includes the Euro sign and several uncommon French letters | | |
20. | ISO 8859-16, Romanian | | |
21. | ASCII Writer | | |
22. | UCS Writer | | |
23. | Unicode Writer | | |
24. | Whether a character is or is not available in a particular encoding | | |
25. | ISO 8859-6, ASCII plus Arabic | | |
26. | ISO 8859-5, ASCII plus Cyrillic (Russian, Byelorussian, etc.) | | |
27. | ISO 8859-7, ASCII plus Greek | | |
28. | IANA to Java Mapping | | |
29. | Java to IANA Mapping | | |
30. | EncodingMap is a convenience class which handles conversions between IANA encoding names and Java encoding names, and vice versa. | | |
31. | Get file encoding | | |