Java tutorial
package com.fasterxml.jackson.dataformat.csv; import java.util.*; import com.fasterxml.jackson.core.FormatSchema; /** * Simple {@link FormatSchema} sub-type that defines properties of * a CSV document to read or write. * Properties supported currently are: *<ul> * <li>columns (List of ColumnDef) [default: empty List]: Ordered list of columns (which may be empty, see below). * Each column has name (mandatory) as well as type (optional; if not * defined, defaults to "String"). * Note that * </li> * <li>useHeader (boolean) [default: false]: whether the first line of physical document defines * column names (true) or not (false): if enabled, parser will take * first-line values to define column names; and generator will output * column names as the first line * </li> * <li>quoteChar (char) [default: double-quote ('")]: character used for quoting values * that contain quote characters or linefeeds. * </li> * <li>columnSeparator (char) [default: comma (',')]: character used to separate values. * Other commonly used values include tab ('\t') and pipe ('|') * </li> * <li>lineSeparator (String) [default: "\n"]: character used to separate data rows. * Only used by generator; parser accepts three standard linefeeds ("\r", "\r\n", "\n"). * </li> * <li>escapeChar (int) [default: -1 meaning "none"]: character, if any, used to * escape values. Most commonly defined as backslash ('\'). Only used by parser; * generator only uses quoting, including doubling up of quotes to indicate quote char * itself. * </li> * <li>skipFirstDataRow (boolean) [default: false]: whether the first data line (either * first line of the document, if useHeader=false, or second, if useHeader=true) * should be completely ignored by parser. Needed to support CSV-like file formats * that include additional non-data content before real data begins (specifically * some database dumps do this) * </li> * </ul> *<p> * Note that schemas without any columns are legal, but if no columns * are added, behavior of parser/generator is usually different and * content will be exposed as logical Arrays instead of Objects. *<p> * There are 4 ways to create <code>CsvSchema</code> instances: *<ul> * <li>Manually build one, using {@link Builder} * </li> * <li>Modify existing schema (using <code>withXxx</code> methods * or {@link #rebuild} for creating {@link Builder}) * </li> * <li>Create schema based on a POJO definition (Class), using * {@link CsvMapper} methods like {@link CsvMapper#schemaFor(java.lang.Class)}. * </li> * <li>Request that {@link CsvParser} reads schema from the first line: * enable "useHeader" property for the initial schema, and let parser * read column names from the document itself. * </li> *</ul> * * @since 1.9 */ public class CsvSchema implements FormatSchema, Iterable<CsvSchema.Column> { /* /********************************************************************** /* Constants /********************************************************************** */ protected final static Column[] NO_COLUMNS = new Column[0]; public final static char DEFAULT_COLUMN_SEPARATOR = ','; public final static char DEFAULT_QUOTE_CHAR = '"'; /** * By default, no escape character is used -- this is denoted by * int value that does not map to a valid character */ public final static int DEFAULT_ESCAPE_CHAR = -1; public final static char[] DEFAULT_LINEFEED = "\n".toCharArray(); /** * By default we do NOT expect the first line to be header. */ public final static boolean DEFAULT_USE_HEADER = false; public final static boolean DEFAULT_SKIP_FIRST_DATA_ROW = false; /* /********************************************************************** /* Helper classes /********************************************************************** */ /** * Enumeration that defines optional type indicators that can be passed * with schema. If used type is used to determine type of * {@link com.fasterxml.jackson.core.JsonToken} * that column values are exposed as. */ public enum ColumnType { /** * Default type if not explicitly defined; value will * be presented as <code>VALUE_STRING</code> by parser, * that is, no type-inference is performed, and value is * not trimmed. */ STRING, /** * Value is considered to be a String, except that tokens * "null", "true" and "false" are recognized as matching * tokens and reported as such; * and values are trimmed (leading/trailing white space) */ STRING_OR_LITERAL, /** * Value should be a number, but literals "null", "true" and "false" * are also understood, and an empty String is considered null. * Values are also trimmed (leading/trailing white space) * Other non-numeric Strings will cause parsing exception. */ NUMBER, /** * Value is taken to be a number (if it matches valid JSON number * formatting rules), literal (null, true or false) or String, * depending on best match. * Values are also trimmed (leading/trailing white space) */ NUMBER_OR_STRING ; } public static class Column { private final String _name; private final int _index; private final ColumnType _type; public Column(int index, String name) { this(index, name, ColumnType.STRING); } public Column(int index, String name, ColumnType type) { _index = index; _name = name; _type = type; } public Column withName(String newName) { return new Column(_index, newName, _type); } public Column withType(ColumnType newType) { return new Column(_index, _name, newType); } public int getIndex() { return _index; } public String getName() { return _name; } public ColumnType getType() { return _type; } } /** * Class used for building {@link CsvSchema} instances. */ public static class Builder { protected final ArrayList<Column> _columns = new ArrayList<Column>(); protected boolean _useHeader = DEFAULT_USE_HEADER; protected boolean _skipFirstDataRow = DEFAULT_SKIP_FIRST_DATA_ROW; protected char _columnSeparator = DEFAULT_COLUMN_SEPARATOR; protected char _quoteChar = DEFAULT_QUOTE_CHAR; // note: need to use int to allow -1 for 'none' protected int _escapeChar = DEFAULT_QUOTE_CHAR; protected char[] _lineSeparator = DEFAULT_LINEFEED; public Builder() { } /** * "Copy" constructor which creates builder that has settings of * given source schema */ public Builder(CsvSchema src) { for (Column col : src._columns) { _columns.add(col); } _useHeader = src._useHeader; _columnSeparator = src._columnSeparator; _quoteChar = src._quoteChar; _escapeChar = src._escapeChar; _lineSeparator = src._lineSeparator; _skipFirstDataRow = src._skipFirstDataRow; } public Builder addColumn(String name) { int index = _columns.size(); return addColumn(new Column(index, name)); } public Builder addColumn(String name, ColumnType type) { int index = _columns.size(); return addColumn(new Column(index, name, type)); } public Builder addColumn(Column c) { _columns.add(c); return this; } public void replaceColumn(int index, Column c) { _checkIndex(index); _columns.set(index, c); } public void renameColumn(int index, String newName) { _checkIndex(index); _columns.set(index, _columns.get(index).withName(newName)); } public void setColumnType(int index, ColumnType type) { _checkIndex(index); _columns.set(index, _columns.get(index).withType(type)); } public Builder clearColumns() { _columns.clear(); return this; } public int size() { return _columns.size(); } public Iterator<Column> getColumns() { return _columns.iterator(); } /** * Method for specifying whether Schema should indicate that * a header line (first row that contains column names) is to be * used for reading and writing or not. */ public Builder setUseHeader(boolean b) { _useHeader = b; return this; } public Builder setSkipFirstDataRow(boolean b) { _skipFirstDataRow = b; return this; } /** * Method for specifying character used to separate column * values. * Default is comma (','). */ public Builder setColumnSeparator(char c) { _columnSeparator = c; return this; } /** * Method for specifying character used for optional quoting * of values. * Default is double-quote ('"'). */ public Builder setQuoteChar(char c) { _quoteChar = c; return this; } /** * Method for specifying character used for optional escaping * of characters in quoted String values. * Default is "not used", meaning that no escaping used. */ public Builder setEscapeChar(char c) { _escapeChar = c; return this; } /** * Method for specifying that no escape character is to be used * with CSV documents this schema defines. */ public Builder disableEscapeChar() { _escapeChar = -1; return this; } public Builder setLineSeparator(String lf) { _lineSeparator = lf.toCharArray(); return this; } public Builder setLineSeparator(char lf) { _lineSeparator = new char[] { lf }; return this; } public CsvSchema build() { Column[] cols = _columns.toArray(new Column[_columns.size()]); return new CsvSchema(cols, _useHeader, _skipFirstDataRow, _columnSeparator, _quoteChar, _escapeChar, _lineSeparator); } protected void _checkIndex(int index) { if (index < 0 || index >= _columns.size()) { throw new IllegalArgumentException( "Illegal index " + index + "; only got " + _columns.size() + " columns"); } } } /* /********************************************************************** /* Configuration, construction /********************************************************************** */ /** * Column definitions, needed for optional header and/or mapping * of field names to column positions. */ protected final Column[] _columns; protected final Map<String, Column> _columnsByName; protected final boolean _useHeader; protected final boolean _skipFirstDataRow; protected final char _columnSeparator; protected final char _quoteChar; protected final int _escapeChar; protected final char[] _lineSeparator; public CsvSchema(Column[] columns, boolean useHeader, boolean skipFirstDataRow, char columnSeparator, char quoteChar, int escapeChar, char[] lineSeparator) { if (columns == null) { columns = NO_COLUMNS; } _columns = columns; _useHeader = useHeader; _skipFirstDataRow = skipFirstDataRow; _columnSeparator = columnSeparator; _quoteChar = quoteChar; _escapeChar = escapeChar; _lineSeparator = lineSeparator; // and then we may need to create a mapping if (_columns.length == 0) { _columnsByName = Collections.emptyMap(); } else { _columnsByName = new HashMap<String, Column>(4 + _columns.length); for (Column c : _columns) { _columnsByName.put(c.getName(), c); } } } /** * Copy constructor used for creating variants using * <code>withXxx()</code> methods. */ protected CsvSchema(Column[] columns, boolean useHeader, boolean skipFirstDataRow, char columnSeparator, char quoteChar, int escapeChar, char[] lineSeparator, Map<String, Column> columnsByName) { _columns = columns; _useHeader = useHeader; _skipFirstDataRow = skipFirstDataRow; _columnSeparator = columnSeparator; _quoteChar = quoteChar; _escapeChar = escapeChar; _lineSeparator = lineSeparator; _columnsByName = columnsByName; } public static Builder builder() { return new Builder(); } /** * Accessor for creating a "default" CSV schema instance, with following * settings: *<ul> * <li>Does NOT use header line * </li> * <li>Uses double quotes ('"') for quoting of field values (if necessary) * </li> * <li>Uses comma (',') as the field separator * </li> * <li>Uses Unix linefeed ('\n') as row separator * </li> * <li>Does NOT use any escape characters * </li> * <li>Does NOT have any columns defined * </li> * </ul> */ public static CsvSchema emptySchema() { return builder().build(); } /** * Helper method for constructing Builder that can be used to create modified * schema. */ public Builder rebuild() { return new Builder(this); } public CsvSchema withUseHeader(boolean state) { return (_useHeader == state) ? this : new CsvSchema(_columns, state, _skipFirstDataRow, _columnSeparator, _quoteChar, _escapeChar, _lineSeparator, _columnsByName); } /** * Helper method for construcing and returning schema instance that * is similar to this one, except that it will be using header line. */ public CsvSchema withHeader() { return withUseHeader(true); } /** * Helper method for construcing and returning schema instance that * is similar to this one, except that it will not be using header line. */ public CsvSchema withoutHeader() { return withUseHeader(false); } public CsvSchema withSkipFirstDataRow(boolean state) { return (_skipFirstDataRow == state) ? this : new CsvSchema(_columns, _useHeader, state, _columnSeparator, _quoteChar, _escapeChar, _lineSeparator, _columnsByName); } public CsvSchema withColumnSeparator(char sep) { return (_columnSeparator == sep) ? this : new CsvSchema(_columns, _useHeader, _skipFirstDataRow, sep, _quoteChar, _escapeChar, _lineSeparator, _columnsByName); } public CsvSchema withQuoteChar(char c) { return (_quoteChar == c) ? this : new CsvSchema(_columns, _useHeader, _skipFirstDataRow, _columnSeparator, c, _escapeChar, _lineSeparator, _columnsByName); } public CsvSchema withEscapeChar(char c) { return (_escapeChar == c) ? this : new CsvSchema(_columns, _useHeader, _skipFirstDataRow, _columnSeparator, _quoteChar, c, _lineSeparator, _columnsByName); } public CsvSchema withoutEscapeChar() { return (_escapeChar == -1) ? this : new CsvSchema(_columns, _useHeader, _skipFirstDataRow, _columnSeparator, _quoteChar, -1, _lineSeparator, _columnsByName); } public CsvSchema withLineSeparator(String sep) { return new CsvSchema(_columns, _useHeader, _skipFirstDataRow, _columnSeparator, _quoteChar, _escapeChar, sep.toCharArray(), _columnsByName); } public CsvSchema withoutColumns() { return new CsvSchema(NO_COLUMNS, _useHeader, _skipFirstDataRow, _columnSeparator, _quoteChar, _escapeChar, _lineSeparator, _columnsByName); } /* /********************************************************************** /* Public API, FormatSchema /********************************************************************** */ @Override public String getSchemaType() { return "CSV"; } /* /********************************************************************** /* Public API, extended, properties /********************************************************************** */ public boolean useHeader() { return _useHeader; } public boolean skipFirstDataRow() { return _skipFirstDataRow; } public char getColumnSeparator() { return _columnSeparator; } public char getQuoteChar() { return _quoteChar; } public int getEscapeChar() { return _escapeChar; } public char[] getLineSeparator() { return _lineSeparator; } /* /********************************************************************** /* Public API, extended; column access /********************************************************************** */ @Override public Iterator<Column> iterator() { return Arrays.asList(_columns).iterator(); } public int size() { return _columns.length; } public Column column(int index) { return _columns[index]; } public Column column(String name) { return _columnsByName.get(name); } /** * Method for getting description of column definitions in * developer-readable form */ public String getColumnDesc() { StringBuilder sb = new StringBuilder(100); for (Column col : _columns) { if (sb.length() == 0) { sb.append('['); } else { sb.append(','); } sb.append('"'); sb.append(col.getName()); sb.append('"'); } sb.append(']'); return sb.toString(); } /* /********************************************************************** /* Other /********************************************************************** */ @Override public String toString() { StringBuilder sb = new StringBuilder(150); sb.append("[CsvSchema: ").append("columns="); boolean first = true; for (Column col : _columns) { if (first) { first = false; sb.append('['); } else { sb.append(','); } sb.append('"'); sb.append(col.getName()); sb.append("\"/"); sb.append(col.getType()); } sb.append(']'); sb.append(']'); return sb.toString(); } }