uk.ac.ucl.excites.sapelli.storage.model.columns.StringColumn.java Source code

Java tutorial

Introduction

Here is the source code for uk.ac.ucl.excites.sapelli.storage.model.columns.StringColumn.java

Source

/**
 * Sapelli data collection platform: http://sapelli.org
 * 
 * Copyright 2012-2016 University College London - ExCiteS group
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and 
 * limitations under the License.
 */

package uk.ac.ucl.excites.sapelli.storage.model.columns;

import java.io.IOException;
import java.nio.charset.Charset;
import java.text.ParseException;

import org.apache.commons.io.Charsets;

import uk.ac.ucl.excites.sapelli.shared.io.BitInputStream;
import uk.ac.ucl.excites.sapelli.shared.io.BitOutputStream;
import uk.ac.ucl.excites.sapelli.shared.io.text.CharsetHelpers;
import uk.ac.ucl.excites.sapelli.shared.util.IntegerRangeMapping;
import uk.ac.ucl.excites.sapelli.shared.util.StringUtils;
import uk.ac.ucl.excites.sapelli.storage.model.Column;
import uk.ac.ucl.excites.sapelli.storage.model.ComparableColumn;
import uk.ac.ucl.excites.sapelli.storage.model.ListLikeColumn;
import uk.ac.ucl.excites.sapelli.storage.util.InvalidValueException;
import uk.ac.ucl.excites.sapelli.storage.visitors.ColumnVisitor;

/**
 * A column for Strings
 * 
 * @author mstevens
 */
public class StringColumn extends ComparableColumn<String> implements ListLikeColumn<String> {

    //STATIC---------------------------------------------------------
    static private final long serialVersionUID = 2L;

    static public final Charset DEFAULT_CHARSET = Charsets.UTF_8;
    static public final char DEFAULT_SERIALISATION_DELIMITER = '\'';

    /**
     * The number of characters that can fit in the given number of bytes when the given Charset is used to encode them.
     * Worst case scenario is assumed in which every char needs to maximum number of bytes.
     * 
     * @return maximum number of characters that can fit
     */
    public static int MaximumCharsIn(int allowedBytes, Charset charset) {
        return (int) Math.floor(allowedBytes / (double) CharsetHelpers.GetMaxBytesPerChar(charset));
    }

    /**
     * The number of bytes needed to encode a String with length up to the given number of characters using the given Charset.
     * Worst case scenario is assumed in which every char needs to maximum number of bytes.
     * 
     * @return number of bytes needed
     */
    public static int BytesNeededFor(int maxLengthChars, Charset charset) {
        return (int) Math.min(Math.ceil(maxLengthChars * ((double) CharsetHelpers.GetMaxBytesPerChar(charset))),
                Integer.MAX_VALUE);
    }

    /**
     * @param name
     * @param optional
     * @param maxLengthChars the maximum length, measured in characters, a String stored in the column will have
     * @return
     */
    public static StringColumn ForCharacterCount(String name, boolean optional, int maxLengthChars) {
        return ForCharacterCount(name, optional, maxLengthChars, (String) null);
    }

    /**
     * @param name
     * @param optional
     * @param maxLengthChars the maximum length, measured in characters, a String stored in the column will have
     * @param defaultValue
     * @return
     */
    public static StringColumn ForCharacterCount(String name, boolean optional, int maxLengthChars,
            String defaultValue) {
        return ForCharacterCount(name, optional, maxLengthChars, DEFAULT_SERIALISATION_DELIMITER);
    }

    /**
     * @param name
     * @param optional
     * @param maxLengthChars the maximum length, measured in characters, a String stored in the column will have
     * @param serialisationDelimiter
     * @return
     */
    public static StringColumn ForCharacterCount(String name, boolean optional, int maxLengthChars,
            char serialisationDelimiter) {
        return ForCharacterCount(name, optional, maxLengthChars, (String) null, serialisationDelimiter);
    }

    /**
     * @param name
     * @param optional
     * @param maxLengthChars the maximum length, measured in characters, a String stored in the column will have
     * @param defaultValue
     * @param serialisationDelimiter
     * @return
     */
    public static StringColumn ForCharacterCount(String name, boolean optional, int maxLengthChars,
            String defaultValue, char serialisationDelimiter) {
        return ForCharacterCount(name, optional, maxLengthChars, DEFAULT_CHARSET, defaultValue,
                serialisationDelimiter);
    }

    /**
     * @param name
     * @param optional
     * @param maxLengthChars the maximum length, measured in characters, a String stored in the column will have
     * @param charset the {@link Charset} to use to encode/decode Strings to/from bytes
     * @return
     */
    public static StringColumn ForCharacterCount(String name, boolean optional, int maxLengthChars,
            Charset charset) {
        return ForCharacterCount(name, optional, maxLengthChars, charset, null);
    }

    /**
     * @param name
     * @param optional
     * @param maxLengthChars the maximum length, measured in characters, a String stored in the column will have
     * @param charset the {@link Charset} to use to encode/decode Strings to/from bytes
     * @param defaultValue
     * @return
     */
    public static StringColumn ForCharacterCount(String name, boolean optional, int maxLengthChars, Charset charset,
            String defaultValue) {
        return ForCharacterCount(name, optional, maxLengthChars, charset, defaultValue,
                DEFAULT_SERIALISATION_DELIMITER);
    }

    /**
     * @param name
     * @param optional
     * @param maxLengthChars the maximum length, measured in characters, a String stored in the column will have
     * @param charset the {@link Charset} to use to encode/decode Strings to/from bytes
     * @param serialisationDelimiter
     * @return
     */
    public static StringColumn ForCharacterCount(String name, boolean optional, int maxLengthChars, Charset charset,
            char serialisationDelimiter) {
        return ForCharacterCount(name, optional, maxLengthChars, charset, null, serialisationDelimiter);
    }

    /**
     * @param name
     * @param optional
     * @param maxLengthChars the maximum length, measured in characters, a String stored in the column will have
     * @param charset the {@link Charset} to use to encode/decode Strings to/from bytes
     * @param defaultValue
     * @param serialisationDelimiter
     * @return
     */
    public static StringColumn ForCharacterCount(String name, boolean optional, int maxLengthChars, Charset charset,
            String defaultValue, char serialisationDelimiter) {
        if (maxLengthChars <= 0)
            throw new IllegalArgumentException(
                    "maxLenghthChars needs to be at least 1 character to make sense, given " + maxLengthChars
                            + " characters");
        return new StringColumn(name, optional, BytesNeededFor(maxLengthChars, charset), charset, defaultValue,
                serialisationDelimiter);
    }

    /**
     * For upgrade purposes only.
     * 
     * @param stringColumn
     * @see uk.ac.ucl.excites.sapelli.storage.db.sql.upgrades.Beta17UpgradeStep
     * @return
     */
    public static StringColumn Get3BytesPerCharUTF8Version(StringColumn stringColumn) {
        if (stringColumn == null || !Charsets.UTF_8.equals(stringColumn.getCharset()))
            return stringColumn;
        // else:
        return new StringColumn(stringColumn.name, stringColumn.optional, stringColumn.getMaximumChars() * 3,
                Charsets.UTF_8, stringColumn.defaultValue);
    }

    //DYNAMIC--------------------------------------------------------
    private final String charsetName;
    private transient Charset charset;
    private final char serialisationDelimiter;
    private final IntegerRangeMapping sizeField;

    /**
     * @param name
     * @param optional
     */
    public StringColumn(String name, boolean optional) {
        this(name, optional, null);
    }

    /**
     * @param name
     * @param optional
     * @param defaultValue
     */
    public StringColumn(String name, boolean optional, String defaultValue) {
        this(name, optional, defaultValue, DEFAULT_SERIALISATION_DELIMITER);
    }

    /**
     * @param name
     * @param optional
     * @param serialisationDelimiter
     */
    public StringColumn(String name, boolean optional, char serialisationDelimiter) {
        this(name, optional, (String) null, serialisationDelimiter);
    }

    /**
     * @param name
     * @param optional
     * @param defaultValue
     * @param serialisationDelimiter
     */
    public StringColumn(String name, boolean optional, String defaultValue, char serialisationDelimiter) {
        this(name, optional,
                BytesNeededFor(Integer.MAX_VALUE /*theoretical max length of Java Strings*/, DEFAULT_CHARSET),
                DEFAULT_CHARSET, defaultValue, serialisationDelimiter);
    }

    /**
     * @param name
     * @param optional
     * @param maxLengthBytes the maximum length (measured in bytes, not chars!) a String stored in this column can have
     */
    public StringColumn(String name, boolean optional, int maxLengthBytes) {
        this(name, optional, maxLengthBytes, (String) null);
    }

    /**
     * @param name
     * @param optional
     * @param maxLengthBytes the maximum length (measured in bytes, not chars!) a String stored in this column can have
     * @param defaultValue
     */
    public StringColumn(String name, boolean optional, int maxLengthBytes, String defaultValue) {
        this(name, optional, maxLengthBytes, defaultValue, DEFAULT_SERIALISATION_DELIMITER);
    }

    /**
     * @param name
     * @param optional
     * @param maxLengthBytes the maximum length (measured in bytes, not chars!) a String stored in this column can have
     * @param serialisationDelimiter
     */
    public StringColumn(String name, boolean optional, int maxLengthBytes, char serialisationDelimiter) {
        this(name, optional, maxLengthBytes, (String) null, serialisationDelimiter);
    }

    /**
     * @param name
     * @param optional
     * @param maxLengthBytes the maximum length (measured in bytes, not chars!) a String stored in this column can have
     * @param defaultValue
     * @param serialisationDelimiter
     */
    public StringColumn(String name, boolean optional, int maxLengthBytes, String defaultValue,
            char serialisationDelimiter) {
        this(name, optional, maxLengthBytes, DEFAULT_CHARSET, defaultValue, serialisationDelimiter);
    }

    /**
     * @param name
     * @param optional
     * @param maxLengthBytes the maximum length (measured in bytes, not chars!) a String stored in this column can have
     * @param charset the {@link Charset} to use to encode/decode Strings to/from bytes
     */
    public StringColumn(String name, boolean optional, int maxLengthBytes, Charset charset) {
        this(name, optional, maxLengthBytes, charset, null);
    }

    /**
     * @param name
     * @param optional
     * @param maxLengthBytes the maximum length (measured in bytes, not chars!) a String stored in this column can have
     * @param charset the {@link Charset} to use to encode/decode Strings to/from bytes
     * @param defaultValue
     */
    public StringColumn(String name, boolean optional, int maxLengthBytes, Charset charset, String defaultValue) {
        this(name, optional, maxLengthBytes, charset, defaultValue, DEFAULT_SERIALISATION_DELIMITER);
    }

    /**
     * @param name
     * @param optional
     * @param maxLengthBytes the maximum length (measured in bytes, not chars!) a String stored in this column can have
     * @param charset the {@link Charset} to use to encode/decode Strings to/from bytes
     * @param serialisationDelimiter
     */
    public StringColumn(String name, boolean optional, int maxLengthBytes, Charset charset,
            char serialisationDelimiter) {
        this(name, optional, maxLengthBytes, charset, null, serialisationDelimiter);
    }

    /**
     * @param name
     * @param optional
     * @param maxLengthBytes the maximum length (measured in bytes, not chars!) a String stored in this column can have
     * @param charset the {@link Charset} to use to encode/decode Strings to/from bytes
     * @param defaultValue
     * @param serialisationDelimiter
     */
    public StringColumn(String name, boolean optional, int maxLengthBytes, Charset charset, String defaultValue,
            char serialisationDelimiter) {
        super(name, optional, defaultValue);
        if (maxLengthBytes <= 0)
            throw new IllegalArgumentException("maxLenghthBytes needs to be at least 1 byte to make sense, given "
                    + maxLengthBytes + " bytes");
        if (charset == null)
            throw new NullPointerException("charset cannot be null!");
        this.charsetName = charset.name(); // !!! (because charset is transient, due to Charset not being Serializable)
        this.charset = charset;
        this.serialisationDelimiter = serialisationDelimiter;
        this.sizeField = new IntegerRangeMapping(0, maxLengthBytes); // empty Strings are allowed
    }

    @Override
    public StringColumn createCopy() {
        return new StringColumn(name, optional, getMaximumBytes(), Charset.forName(charsetName), defaultValue);
    }

    /**
     * @return the serialisationDelimiter
     */
    @Override
    public Character getSerialisationDelimiter() {
        return serialisationDelimiter;
    }

    /**
     * @param valueString the String to parse, expected to be neither null nor empty and delimited by {@link #serialisationDelimiter} (meaning that the resulting value is meant to be the empty String, and the serialisationDelimiter is "'", then the given valueString must be "''")
     * @return the parsed value
     * 
     * @see #toString(String)
     */
    @Override
    public String parse(String valueString) throws ParseException {
        return parse(valueString, false); // delimited!
    }

    /**
     * If {@code undelimited} is {@code false} the given {@code valueString} is expected to be wrapped/escaped using {@link #serialisationDelimiter}s.
     * 
     * @see uk.ac.ucl.excites.sapelli.storage.model.ListLikeColumn#parse(java.lang.String, boolean)
     * @see #toString(String, boolean)
     */
    @Override
    public String parse(String valueString, boolean undelimited)
            throws ParseException, IllegalArgumentException, NullPointerException {
        if (!undelimited) {
            // Perform delimiter checks:
            if (valueString.length() < 2)
                throw new ParseException("String is not delimited by " + serialisationDelimiter + "s", 0);
            if (valueString.charAt(0) != serialisationDelimiter)
                throw new ParseException("String does not begin with " + serialisationDelimiter, 0);
            if (valueString.charAt(valueString.length() - 1) != serialisationDelimiter)
                throw new ParseException("String does not end with " + serialisationDelimiter,
                        valueString.length() - 1);
            // Remove serialisationDelimiters:
            return StringUtils.deescapeByDoublingAndWrapping(valueString, serialisationDelimiter);
        } else
            return valueString;
    }

    /**
     * The given value will be wrapped/escaped using {@link #serialisationDelimiter}s to preserve the difference between a null String and an empty String value (because empty Strings are treated as null in {@link Column})
     * Occurrences of the delimiter *inside* the value will be doubled. 
     * 
     * @see uk.ac.ucl.excites.sapelli.storage.model.Column#toString(java.lang.Object)
     */
    @Override
    public String toString(String value) {
        return toString(value, false); // delimited!
    }

    /**
     * If {@code undelimited} is {@code false} given value will be wrapped/escaped using {@link #serialisationDelimiter}s to preserve the difference between a null String and an empty String value (because empty Strings are treated as null in {@link Column}).
     * Occurrences of the delimiter *inside* the value will then be doubled.
     * 
     * @see uk.ac.ucl.excites.sapelli.storage.model.ListLikeColumn#toString(java.lang.Object, boolean)
     */
    @Override
    public String toString(String value, boolean undelimited) {
        return undelimited ? value
                : StringUtils.escapeByDoublingAndWrapping(value, serialisationDelimiter, /*force:*/ true);
    }

    public boolean fits(String value) {
        return isValidValue(value);
    }

    /**
     * Checks for size restriction violations
     * 
     * @see uk.ac.ucl.excites.sapelli.storage.model.Column#validate(java.lang.Object)
     */
    @Override
    protected void validate(String value) throws InvalidValueException {
        int bytesNeeded = StringUtils.sizeBytes(value, getCharset());
        if (bytesNeeded > getMaximumBytes())
            throw new InvalidValueException("String \"" + value + "\" is too long (it would take " + bytesNeeded
                    + " bytes, while the maximum allowed is " + getMaximumBytes() + " bytes).", this);
    }

    @Override
    protected void write(String value, BitOutputStream bitStream, boolean lossless) throws IOException {
        // Write length:
        sizeField.write(StringUtils.sizeBytes(value, getCharset()), bitStream);
        // Write actual string:
        bitStream.write(value, getCharset());
    }

    @Override
    protected String read(BitInputStream bitStream, boolean lossless) throws IOException {
        //Read length:
        int numberOfBytes = sizeField.read(bitStream).intValue();
        //Read actual string:
        return bitStream.readString(numberOfBytes, getCharset());
    }

    @Override
    protected int getMinimumValueSize(boolean lossless) {
        return sizeField.size(); // when stored string is empty: just the size field
    }

    @Override
    protected int getMaximumValueSize(boolean lossless) {
        return sizeField.size() + (getMaximumBytes() * Byte.SIZE);
    }

    public int getMaximumBytes() {
        return sizeField.highBound().intValue();
    }

    /**
     * Worst case scenario is assumed in which every char needs to maximum number of bytes
     * 
     * @return
     */
    public int getMaximumChars() {
        return MaximumCharsIn(getMaximumBytes(), getCharset());
    }

    /* (non-Javadoc)
     * @see uk.ac.ucl.excites.sapelli.storage.model.Column#canBeLossy()
     */
    @Override
    public boolean canBeLossy() {
        return false;
    }

    @Override
    protected boolean equalRestrictions(Column<String> otherColumn) {
        if (otherColumn instanceof StringColumn) {
            StringColumn that = (StringColumn) otherColumn;
            return this.getMaximumBytes() == that.getMaximumBytes()
                    && this.serialisationDelimiter == that.serialisationDelimiter
                    && this.getCharset().equals(that.getCharset());
        } else
            return false;
    }

    @Override
    protected String copy(String value) {
        return new String(value);
    }

    @Override
    public void accept(ColumnVisitor visitor) {
        visitor.visit(this);
    }

    @Override
    protected int compareNonNullValues(String lhs, String rhs) {
        return lhs.compareTo(rhs);
    }

    @Override
    public int hashCode() {
        int hash = super.hashCode();
        hash = 31 * hash + getCharset().hashCode();
        hash = 31 * hash + serialisationDelimiter;
        hash = 31 * hash + sizeField.hashCode();
        return hash;
    }

    public Charset getCharset() {
        if (this.charset == null)
            this.charset = Charset.forName(charsetName); // needed because charset member variable is transient (because Charset is not a Serialisable class)
        return charset;
    }

    @Override
    public Class<String> getType() {
        return String.class;
    }

}