org.apache.jackrabbit.core.persistence.util.BundleWriter.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.jackrabbit.core.persistence.util.BundleWriter.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.jackrabbit.core.persistence.util;

import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.math.BigDecimal;
import java.util.Calendar;
import java.util.Collection;
import java.util.GregorianCalendar;

import javax.jcr.PropertyType;
import javax.jcr.RepositoryException;

import org.apache.commons.io.IOExceptionWithCause;
import org.apache.commons.io.IOUtils;
import org.apache.jackrabbit.core.id.NodeId;
import org.apache.jackrabbit.core.value.InternalValue;
import org.apache.jackrabbit.core.persistence.util.NodePropBundle.ChildNodeEntry;
import org.apache.jackrabbit.core.persistence.util.NodePropBundle.PropertyEntry;
import org.apache.jackrabbit.spi.Name;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Bundle serializer.
 *
 * @see BundleReader
 */
class BundleWriter {

    /** Logger instance */
    private static Logger log = LoggerFactory.getLogger(BundleWriter.class);

    private final BundleBinding binding;

    private final DataOutputStream out;

    /**
     * The default namespace and the first six other namespaces used in this
     * bundle. Used by the {@link #writeName(Name)} method to keep track of
     * already seen namespaces.
     */
    private final String[] namespaces =
            // NOTE: The length of this array must be seven
            { Name.NS_DEFAULT_URI, null, null, null, null, null, null };

    /**
     * Creates a new bundle serializer.
     *
     * @param binding bundle binding
     * @param stream stream to which the bundle will be written
     * @throws IOException if an I/O error occurs.
     */
    public BundleWriter(BundleBinding binding, OutputStream stream) throws IOException {
        assert namespaces.length == 7;
        this.binding = binding;
        this.out = new DataOutputStream(stream);
        this.out.writeByte(BundleBinding.VERSION_CURRENT);
    }

    /**
     * Serializes a <code>NodePropBundle</code> to a data output stream
     *
     * @param bundle the bundle to serialize
     * @throws IOException if an I/O error occurs.
     */
    public void writeBundle(NodePropBundle bundle) throws IOException {
        long size = out.size();

        // primaryType
        writeName(bundle.getNodeTypeName());

        // parentUUID
        NodeId parentId = bundle.getParentId();
        if (parentId == null) {
            parentId = BundleBinding.NULL_PARENT_ID;
        }
        writeNodeId(parentId);

        // write mod count
        writeVarInt(bundle.getModCount());

        Collection<Name> mixins = bundle.getMixinTypeNames();
        Collection<PropertyEntry> properties = bundle.getPropertyEntries();
        Collection<ChildNodeEntry> nodes = bundle.getChildNodeEntries();
        Collection<NodeId> shared = bundle.getSharedSet();

        int mn = mixins.size();
        int pn = properties.size();
        int nn = nodes.size();
        int sn = shared.size();
        int referenceable = 0;
        if (bundle.isReferenceable()) {
            referenceable = 1;
        }
        out.writeByte(Math.min(mn, 1) << 7 | Math.min(pn, 7) << 4 | Math.min(nn, 3) << 2 | Math.min(sn, 1) << 1
                | referenceable);

        // mixin types
        writeVarInt(mn, 1);
        for (Name name : mixins) {
            writeName(name);
        }

        // properties
        writeVarInt(pn, 7);
        for (PropertyEntry property : properties) {
            writeState(property);
        }

        // child nodes (list of name/uuid pairs)
        writeVarInt(nn, 3);
        for (ChildNodeEntry child : nodes) {
            writeName(child.getName()); // name
            writeNodeId(child.getId()); // uuid
        }

        // write shared set
        writeVarInt(sn, 1);
        for (NodeId nodeId : shared) {
            writeNodeId(nodeId);
        }

        // set size of bundle
        bundle.setSize(out.size() - size);
    }

    /**
     * Serializes a property entry. The serialization begins with the
     * property name followed by a single byte that encodes the type and
     * multi-valuedness of the property:
     * <pre>
     * +-------------------------------+
     * |   mv count    |     type      |
     * +-------------------------------+
     * </pre>
     * <p>
     * The lower four bits encode the property type (0-12 in JCR 2.0) and
     * higher bits indicate whether this is a multi-valued property and how
     * many property values there are. A value of 0 is reserved for
     * single-valued properties (that are guaranteed to always have just a
     * single value), and all non-zero values indicate a multi-valued property.
     * <p>
     * In multi-valued properties the exact value of the "mv count" field is
     * the number of property values plus one and truncated at 15 (the highest
     * four-bit value). If there are 14 or more (14 + 1 == 15) property values,
     * then the number of additional values is serialized as a variable-length
     * integer (see {@link #writeVarInt(int)}) right after this byte.
     * <p>
     * The modification count of the property state is written next as a
     * variable-length integer, followed by the serializations of all the
     * values of this property.
     *
     * @param state the property entry to store
     * @throws IOException if an I/O error occurs.
     */
    private void writeState(NodePropBundle.PropertyEntry state) throws IOException {
        writeName(state.getName());

        InternalValue[] values = state.getValues();

        int type = state.getType();
        if (type < 0 || type > 0xf) {
            throw new IOException("Illegal property type " + type);
        }
        if (state.isMultiValued()) {
            int len = values.length + 1;
            if (len < 0x0f) {
                out.writeByte(len << 4 | type);
            } else {
                out.writeByte(0xf0 | type);
                writeVarInt(len - 0x0f);
            }
        } else {
            if (values.length != 1) {
                throw new IOException(
                        "Single values property with " + values.length + " values: " + state.getName());
            }
            out.writeByte(type);
        }

        writeVarInt(state.getModCount());

        // values
        for (int i = 0; i < values.length; i++) {
            InternalValue val = values[i];
            switch (type) {
            case PropertyType.BINARY:
                try {
                    long size = val.getLength();
                    if (val.isInDataStore()) {
                        out.writeInt(BundleBinding.BINARY_IN_DATA_STORE);
                        writeString(val.toString());
                    } else if (binding.dataStore != null) {
                        writeSmallBinary(val, state, i);
                    } else if (size < 0) {
                        log.warn("Blob has negative size. Potential loss of data. " + "id={} idx={}", state.getId(),
                                String.valueOf(i));
                        out.writeInt(0);
                        values[i] = InternalValue.create(new byte[0]);
                        val.discard();
                    } else if (size > binding.getMinBlobSize()) {
                        // special handling required for binary value:
                        // spool binary value to file in blob store
                        out.writeInt(BundleBinding.BINARY_IN_BLOB_STORE);
                        String blobId = state.getBlobId(i);
                        if (blobId == null) {
                            BLOBStore blobStore = binding.getBlobStore();
                            try {
                                InputStream in = val.getStream();
                                try {
                                    blobId = blobStore.createId(state.getId(), i);
                                    blobStore.put(blobId, in, size);
                                    state.setBlobId(blobId, i);
                                } finally {
                                    IOUtils.closeQuietly(in);
                                }
                            } catch (Exception e) {
                                String msg = "Error while storing blob. id=" + state.getId() + " idx=" + i
                                        + " size=" + size;
                                log.error(msg, e);
                                throw new IOExceptionWithCause(msg, e);
                            }
                            try {
                                // replace value instance with value
                                // backed by resource in blob store and delete temp file
                                if (blobStore instanceof ResourceBasedBLOBStore) {
                                    values[i] = InternalValue
                                            .create(((ResourceBasedBLOBStore) blobStore).getResource(blobId));
                                } else {
                                    values[i] = InternalValue.create(blobStore.get(blobId));
                                }
                            } catch (Exception e) {
                                log.error("Error while reloading blob. truncating. id=" + state.getId() + " idx="
                                        + i + " size=" + size, e);
                                values[i] = InternalValue.create(new byte[0]);
                            }
                            val.discard();
                        }
                        // store id of blob as property value
                        writeString(blobId); // value
                    } else {
                        // delete evt. blob
                        byte[] data = writeSmallBinary(val, state, i);
                        // replace value instance with value
                        // backed by resource in blob store and delete temp file
                        values[i] = InternalValue.create(data);
                        val.discard();
                    }
                } catch (RepositoryException e) {
                    String msg = "Error while storing blob. id=" + state.getId() + " idx=" + i + " value=" + val;
                    log.error(msg, e);
                    throw new IOExceptionWithCause(msg, e);
                }
                break;
            case PropertyType.DOUBLE:
                try {
                    out.writeDouble(val.getDouble());
                } catch (RepositoryException e) {
                    throw convertToIOException(type, e);
                }
                break;
            case PropertyType.DECIMAL:
                try {
                    writeDecimal(val.getDecimal());
                } catch (RepositoryException e) {
                    throw convertToIOException(type, e);
                }
                break;
            case PropertyType.LONG:
                try {
                    writeVarLong(val.getLong());
                } catch (RepositoryException e) {
                    throw convertToIOException(type, e);
                }
                break;
            case PropertyType.BOOLEAN:
                try {
                    out.writeBoolean(val.getBoolean());
                } catch (RepositoryException e) {
                    throw convertToIOException(type, e);
                }
                break;
            case PropertyType.NAME:
                try {
                    writeName(val.getName());
                } catch (RepositoryException e) {
                    throw convertToIOException(type, e);
                }
                break;
            case PropertyType.WEAKREFERENCE:
            case PropertyType.REFERENCE:
                writeNodeId(val.getNodeId());
                break;
            case PropertyType.DATE:
                try {
                    writeDate(val.getCalendar());
                } catch (RepositoryException e) {
                    throw convertToIOException(type, e);
                }
                break;
            case PropertyType.STRING:
            case PropertyType.PATH:
            case PropertyType.URI:
                writeString(val.toString());
                break;
            default:
                throw new IOException("Inknown property type: " + type);
            }
        }
    }

    private static IOException convertToIOException(int propertyType, Exception e) {
        String typeName = PropertyType.nameFromValue(propertyType);
        String message = "Unexpected error for property type " + typeName + " value.";
        log.error(message, e);
        return new IOExceptionWithCause(message, e);
    }

    /**
     * Write a small binary value and return the data.
     *
     * @param value the binary value
     * @param state the property state (for error messages)
     * @param i the index (for error messages)
     * @return the data
     * @throws IOException if the data could not be read
     */
    private byte[] writeSmallBinary(InternalValue value, NodePropBundle.PropertyEntry state, int i)
            throws IOException {
        try {
            int size = (int) value.getLength();
            out.writeInt(size);
            byte[] data = new byte[size];
            DataInputStream in = new DataInputStream(value.getStream());
            try {
                in.readFully(data);
            } finally {
                IOUtils.closeQuietly(in);
            }
            out.write(data, 0, data.length);
            return data;
        } catch (Exception e) {
            String msg = "Error while storing blob. id=" + state.getId() + " idx=" + i + " value=" + value;
            log.error(msg, e);
            throw new IOExceptionWithCause(msg, e);
        }
    }

    /**
     * Serializes a node identifier
     *
     * @param id the node id
     * @throws IOException in an I/O error occurs.
     */
    private void writeNodeId(NodeId id) throws IOException {
        out.writeLong(id.getMostSignificantBits());
        out.writeLong(id.getLeastSignificantBits());
    }

    /**
     * Serializes a BigDecimal
     *
     * @param decimal the decimal number
     * @throws IOException in an I/O error occurs.
     */
    private void writeDecimal(BigDecimal decimal) throws IOException {
        if (decimal == null) {
            out.writeBoolean(false);
        } else {
            out.writeBoolean(true);
            // TODO more efficient serialization format
            writeString(decimal.toString());
        }
    }

    /**
     * Serializes a name. The name encoding works as follows:
     * <p>
     * First; if the name is known by the {@link BundleNames} class (this
     * includes the <code>null</code> name), then the name is serialized
     * as a single byte using the following format.
     * <pre>
     * +-------------------------------+
     * | 0 |    common name index      |
     * +-------------------------------+
     * </pre>
     * <p>
     * Second; if the name is not known, it gets serialized as a
     * variable-length field whose first byte looks like this:
     * <pre>
     * +-------------------------------+
     * | 1 | ns index  |  name length  |
     * +-------------------------------+
     * </pre>
     * <p>
     * The three-bit namespace index identifies the namespace of the name.
     * The serializer keeps track of the default namespace (value 0) and at
     * most six other other namespaces (values 1-6), in the order they appear
     * in the bundle. When one of these six custom namespaces first appears
     * in the bundle, then the namespace URI is written using
     * {@link #writeString(String)} right after this byte.
     * Later uses of such a namespace simply refers back to the already read
     * namespace URI string. Any other namespaces are identified with value 7
     * and always written to the bundle after this byte.
     * <p>
     * The four-bit name length field indicates the length (in UTF-8 bytes)
     * of the local part of the name. Since zero-length local names are not
     * allowed, the length is first decremented by one before storing in this
     * field. The UTF-8 byte sequence is written out after this byte and the
     * possible namespace URI string. If the length of the local name is
     * larger than 15 (i.e. would be stored as 0x0f or more), then the value
     * 0x0f is stored as the name length and the name string is written as
     * UTF-8 using {@link #writeBytes(byte[], int)} with a base length of
     * 0x10 (0x0f + 1).
     *
     * @param name the name
     * @throws IOException in an I/O error occurs.
     */
    private void writeName(Name name) throws IOException {
        int index = BundleNames.nameToIndex(name);
        if (index != -1) {
            assert 0 <= index && index < 0x80;
            out.writeByte(index);
        } else {
            String uri = name.getNamespaceURI();
            int ns = 0;
            while (ns < namespaces.length && namespaces[ns] != null && !namespaces[ns].equals(uri)) {
                ns++;
            }

            String local = name.getLocalName();
            if (local.length() == 0) {
                throw new IOException("Attempt to write an empty local name: " + name);
            }
            byte[] bytes = local.getBytes("UTF-8");
            int len = Math.min(bytes.length - 1, 0x0f);

            out.writeByte(0x80 | ns << 4 | len);
            if (ns == namespaces.length || namespaces[ns] == null) {
                writeString(uri);
                if (ns < namespaces.length) {
                    namespaces[ns] = uri;
                }
            }
            if (len != 0x0f) {
                out.write(bytes);
            } else {
                writeBytes(bytes, 0x0f + 1);
            }
        }
    }

    /**
     * Serializes an integer using a variable-length encoding that favors
     * small positive numbers. The serialization consists of one to five
     * bytes of the following format:
     * <pre>
     * +-------------------------------+
     * | c | 7 least significant bits  |
     * +-------------------------------+
     * </pre>
     * <p>
     * If the given integer fits in seven bits (i.e. the value between
     * 0 and 127, inclusive), then it is written as-is in a single byte.
     * Otherwise the continuation flag <code>c</code> is set and the least
     * significant seven bits are written together with the flag as a single
     * byte. The integer is then shifed right seven bits and the process
     * continues from the beginning.
     * <p>
     * This format uses a single byte for values 0-127, two bytes for
     * 128-16343, three for 16343-2097151, four for 2097152-268435455
     * and five bytes for all other 32-bit numbers (including negative ones).
     *
     * @param integer integer value
     * @throws IOException if an I/O error occurs
     */
    private void writeVarInt(int value) throws IOException {
        while (true) {
            int b = value & 0x7f;
            if (b != value) {
                out.writeByte(b | 0x80);
                value >>>= 7; // unsigned shift
            } else {
                out.writeByte(b);
                return;
            }
        }
    }

    private void writeVarInt(int value, int base) throws IOException {
        if (value >= base) {
            writeVarInt(value - base);
        }
    }

    /**
     * Serializes a long value using a variable length encoding like the
     * one used by {@link #writeVarInt(int)} for integer values. Before
     * writing out, the value is first normalized to an unsigned value
     * by moving the sign bit to be the end negating the other bits of
     * a negative value. This normalization step maximizes the number of
     * zero high order bits for typical small values (positive or negative),
     * and thus keeps the serialization short.
     *
     * @param value long value
     * @throws IOException if an I/O error occurs
     */
    private void writeVarLong(long value) throws IOException {
        // Normalize to an unsigned value with the sign as the lowest bit
        if (value < 0) {
            value = ~value << 1 | 1;
        } else {
            value <<= 1;
        }
        while (true) {
            long b = value & 0x7f;
            if (b != value) {
                out.writeByte((int) b | 0x80);
                value >>>= 7; // unsigned shift
            } else {
                out.writeByte((int) b);
                return;
            }
        }
    }

    /**
     * Serializes a JCR date value using the {@link #writeVarLong(long)}
     * serialization on a special 64-bit date encoding. This encoding maps
     * the <code>sYYYY-MM-DDThh:mm:ss.sssTZD</code> date format used by
     * JCR to an as small 64 bit integer (positive or negative) as possible,
     * while preserving full accuracy (including time zone offsets) and
     * favouring common levels of accuracy (per minute, hour and day) over
     * full millisecond level detail.
     * <p>
     * Each date value is mapped to separate timestamp and timezone fields,
     * both of whose lenghts are variable: 
     * <pre>
     * +----- ... ------- ... --+
     * |  timestamp  | timezone |
     * +----- ... ------- ... --+
     * </pre>
     * <p>
     * The type and length of the timezone field can be determined by looking
     * at the two least significant bits of the value:
     * <dl>
     *   <dt><code>?0</code></dt>
     *   <dd>
     *     UTC time. The length of the timezone field is just one bit,
     *     i.e. the second bit is already a part of the timestamp field.
     *   </dd>
     *   <dt><code>01</code></dt>
     *   <dd>
     *     The offset is counted as hours from UTC, and stored as the number
     *     of hours (positive or negative) in the next 5 bits (range from
     *     -16 to +15 hours), making the timezone field 7 bits long in total.
     *   </dd>
     *   <dt><code>11</code></dt>
     *   <dd>
     *     The offset is counted as hours and minutes from UTC, and stored
     *     as the total minute offset (positive or negative) in the next
     *     11 bits (range from -17 to +17 hours), making the timezone field
     *     13 bits long in total.
     *   </dd>
     * </dl>
     * <p>
     * The remaining 51-63 bits of the encoded value make up the timestamp
     * field that also uses the two least significant bits to indicate the
     * type and length of the field:
     * <dl>
     *   <dt><code>00</code></dt>
     *   <dd>
     *     <code>sYYYY-MM-DDT00:00:00.000</code>, i.e. midnight of the
     *     specified date. The next 9 bits encode the day within the year
     *     (starting from 1, maximum value 366) and the remaining bits are
     *     used for the year, stored as an offset from year 2010.
     *   </dd>
     *   <dt><code>01</code></dt>
     *   <dd>
     *     <code>sYYYY-MM-DDThh:00:00.000</code>, i.e. at the hour. The
     *     next 5 bits encode the hour within the day (starting from 0,
     *     maximum value 23) and the remaining bits are used as described
     *     above for the date.
     *   </dd>
     *   <dt><code>10</code></dt>
     *   <dd>
     *     <code>sYYYY-MM-DDThh:mm:00.000</code>, i.e. at the minute. The
     *     next 11 bits encode the minute within the day (starting from 0,
     *     maximum value 1439) and the remaining bits are used as described
     *     above for the date.
     *   </dd>
     *   <dt><code>11</code></dt>
     *   <dd>
     *     <code>sYYYY-MM-DDThh:mm:ss.sss</code>, i.e. full millisecond
     *     accuracy. The next 30 bits encode the millisecond within the
     *     day (starting from 0, maximum value 87839999) and the remaining
     *     bits are used as described above for the date.
     *   </dd>
     * </dl>
     * <p>
     * With full timezone and millisecond accuracies, this encoding leaves
     * 10 bits (64 - 9 - 30 - 2 - 11 - 2) for the date offset, which allows
     * for representation of all timestamps between years 1498 and 2521.
     * Timestamps outside this range and with a minute-level timezone offset
     * are automatically truncated to minute-level accuracy to support the
     * full range of years -9999 to 9999 specified in JCR.
     * <p>
     * Note that the year, day of year, and time of day values are stored
     * as separate bit sequences to avoid problems with changing leap second
     * or leap year definitions. Bit fields are used for better encoding and
     * decoding performance than what would be possible with the slightly more
     * space efficient mechanism of using multiplication and modulo divisions
     * to separate the different timestamp fields.
     *
     * @param value date value
     * @throws IOException if an I/O error occurs
     */
    private void writeDate(Calendar value) throws IOException {
        int y = value.get(Calendar.YEAR);
        if (value.isSet(Calendar.ERA) && value.get(Calendar.ERA) == GregorianCalendar.BC) {
            y = 1 - y; // convert to an astronomical year
        }
        y -= 2010; // use a recent offset NOTE: do not change this!

        int d = value.get(Calendar.DAY_OF_YEAR);
        int h = value.get(Calendar.HOUR_OF_DAY);
        int m = value.get(Calendar.MINUTE);
        int s = value.get(Calendar.SECOND);
        int u = value.get(Calendar.MILLISECOND);
        int z = value.getTimeZone().getOffset(value.getTimeInMillis()) / (60 * 1000);
        int zh = z / 60;
        int zm = z - zh * 60;

        long ts = y << 9 | d & 0x01ff;

        if ((u != 0 || s != 0) && ((-512 <= y && y < 512) || zm == 0)) {
            ts <<= 30;
            ts |= (((h * 60 + m) * 60 + s) * 1000 + u) & 0x3fffffff; // 30 bits
            ts <<= 2;
            ts |= 3;
        } else if (m != 0) {
            ts <<= 11;
            ts |= (h * 60 + m) & 0x07ff; // 11 bits
            ts <<= 2;
            ts |= 2;
        } else if (h != 0) {
            ts <<= 5;
            ts |= h & 0x1f; // 5 bits
            ts <<= 2;
            ts |= 1;
        } else {
            ts <<= 2;
        }

        if (zm != 0) {
            ts <<= 11;
            ts |= z & 0x07ff; // 11 bits
            writeVarLong(ts << 2 | 3);
        } else if (zh != 0) {
            ts <<= 5;
            ts |= zh & 0x1f; // 5 bits
            writeVarLong(ts << 2 | 1);
        } else {
            writeVarLong(ts << 1);
        }
    }

    /**
     * Serializes a string in UTF-8. The length of the UTF-8 byte sequence
     * is first written as a variable-length string (see
     * {@link #writeVarInt(int)}), and then the sequence itself is written.
     *
     * @param value string value
     * @throws IOException if an I/O error occurs
     */
    private void writeString(String value) throws IOException {
        writeBytes(value.getBytes("UTF-8"), 0);
    }

    /**
     * Serializes the given array of bytes. The length of the byte array is
     * first written as a {@link #writeVarInt(int) variable length integer},
     * followed by the given bytes.
     *
     * @param bytes the bytes to be serialized
     * @param base optional base length
     * @throws IOException if an I/O error occurs
     */
    private void writeBytes(byte[] bytes, int base) throws IOException {
        assert bytes.length >= base;
        writeVarInt(bytes.length - base);
        out.write(bytes);
    }

}