Java tutorial
/* Copyright (c) 2013 James Ahlborn This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package com.healthmarketscience.jackcess.impl; import java.io.ByteArrayInputStream; import java.io.Closeable; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.sql.Blob; import java.sql.SQLException; import java.sql.SQLFeatureNotSupportedException; import java.text.Normalizer; import java.util.EnumSet; import java.util.Set; import java.util.regex.Pattern; import com.healthmarketscience.jackcess.DataType; import com.healthmarketscience.jackcess.util.OleBlob; import static com.healthmarketscience.jackcess.util.OleBlob.*; import org.apache.commons.lang.builder.ToStringBuilder; /** * Utility code for working with OLE data. * * @author James Ahlborn * @usage _advanced_class_ */ public class OleUtil { /** * Interface used to allow optional inclusion of the poi library for working * with compound ole data. */ interface CompoundPackageFactory { public ContentImpl createCompoundPackageContent(OleBlobImpl blob, String prettyName, String className, String typeName, ByteBuffer blobBb, int dataBlockLen); } private static final int PACKAGE_SIGNATURE = 0x1C15; private static final Charset OLE_CHARSET = Charset.forName("US-ASCII"); private static final Charset OLE_UTF_CHARSET = Charset.forName("UTF-16LE"); private static final byte[] COMPOUND_STORAGE_SIGNATURE = { (byte) 0xd0, (byte) 0xcf, (byte) 0x11, (byte) 0xe0, (byte) 0xa1, (byte) 0xb1, (byte) 0x1a, (byte) 0xe1 }; private static final String SIMPLE_PACKAGE_TYPE = "Package"; private static final int PACKAGE_OBJECT_TYPE = 0x02; private static final int OLE_VERSION = 0x0501; private static final int OLE_FORMAT = 0x02; private static final int PACKAGE_STREAM_SIGNATURE = 0x02; private static final int PS_EMBEDDED_FILE = 0x030000; private static final int PS_LINKED_FILE = 0x010000; private static final Set<ContentType> WRITEABLE_TYPES = EnumSet.of(ContentType.LINK, ContentType.SIMPLE_PACKAGE, ContentType.OTHER); private static final byte[] NO_DATA = new byte[0]; private static final int LINK_HEADER = 0x01; private static final byte[] PACKAGE_FOOTER = { 0x01, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, (byte) 0xAD, 0x05, (byte) 0xFE }; // regex pattern which matches all the crazy extra stuff in unicode private static final Pattern UNICODE_ACCENT_PATTERN = Pattern .compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+"); private static final CompoundPackageFactory COMPOUND_FACTORY; static { CompoundPackageFactory compoundFactory = null; try { compoundFactory = (CompoundPackageFactory) Class .forName("com.healthmarketscience.jackcess.impl.CompoundOleUtil").newInstance(); } catch (Throwable t) { // must not have poi, will load compound ole data as "other" } COMPOUND_FACTORY = compoundFactory; } /** * Parses an access database blob structure and returns an appropriate * OleBlob instance. */ public static OleBlob parseBlob(byte[] bytes) { return new OleBlobImpl(bytes); } /** * Creates a new OlBlob instance using the given information. */ public static OleBlob createBlob(OleBlob.Builder oleBuilder) throws IOException { try { if (!WRITEABLE_TYPES.contains(oleBuilder.getType())) { throw new IllegalArgumentException( "Cannot currently create ole values of type " + oleBuilder.getType()); } long contentLen = oleBuilder.getContentLength(); byte[] contentBytes = oleBuilder.getBytes(); InputStream contentStream = oleBuilder.getStream(); byte[] packageStreamHeader = NO_DATA; byte[] packageStreamFooter = NO_DATA; switch (oleBuilder.getType()) { case LINK: packageStreamHeader = writePackageStreamHeader(oleBuilder); // link "content" is file path contentBytes = getZeroTermStrBytes(oleBuilder.getFilePath()); contentLen = contentBytes.length; break; case SIMPLE_PACKAGE: packageStreamHeader = writePackageStreamHeader(oleBuilder); packageStreamFooter = writePackageStreamFooter(oleBuilder); break; case OTHER: // nothing more to do break; default: throw new RuntimeException("unexpected type " + oleBuilder.getType()); } long payloadLen = packageStreamHeader.length + packageStreamFooter.length + contentLen; byte[] packageHeader = writePackageHeader(oleBuilder, payloadLen); long totalOleLen = packageHeader.length + PACKAGE_FOOTER.length + payloadLen; if (totalOleLen > DataType.OLE.getMaxSize()) { throw new IllegalArgumentException( "Content size of " + totalOleLen + " is too large for ole column"); } byte[] oleBytes = new byte[(int) totalOleLen]; ByteBuffer bb = PageChannel.wrap(oleBytes); bb.put(packageHeader); bb.put(packageStreamHeader); if (contentLen > 0L) { if (contentBytes != null) { bb.put(contentBytes); } else { byte[] buf = new byte[8192]; int numBytes = 0; while ((numBytes = contentStream.read(buf)) >= 0) { bb.put(buf, 0, numBytes); } } } bb.put(packageStreamFooter); bb.put(PACKAGE_FOOTER); return parseBlob(oleBytes); } finally { ByteUtil.closeQuietly(oleBuilder.getStream()); } } private static byte[] writePackageHeader(OleBlob.Builder oleBuilder, long contentLen) { byte[] prettyNameBytes = getZeroTermStrBytes(oleBuilder.getPrettyName()); String className = oleBuilder.getClassName(); String typeName = oleBuilder.getTypeName(); if (className == null) { className = typeName; } else if (typeName == null) { typeName = className; } byte[] classNameBytes = getZeroTermStrBytes(className); byte[] typeNameBytes = getZeroTermStrBytes(typeName); int packageHeaderLen = 20 + prettyNameBytes.length + classNameBytes.length; int oleHeaderLen = 24 + typeNameBytes.length; byte[] headerBytes = new byte[packageHeaderLen + oleHeaderLen]; ByteBuffer bb = PageChannel.wrap(headerBytes); // write outer package header bb.putShort((short) PACKAGE_SIGNATURE); bb.putShort((short) packageHeaderLen); bb.putInt(PACKAGE_OBJECT_TYPE); bb.putShort((short) prettyNameBytes.length); bb.putShort((short) classNameBytes.length); int prettyNameOff = bb.position() + 8; bb.putShort((short) prettyNameOff); bb.putShort((short) (prettyNameOff + prettyNameBytes.length)); bb.putInt(-1); bb.put(prettyNameBytes); bb.put(classNameBytes); // put ole header bb.putInt(OLE_VERSION); bb.putInt(OLE_FORMAT); bb.putInt(typeNameBytes.length); bb.put(typeNameBytes); bb.putLong(0L); bb.putInt((int) contentLen); return headerBytes; } private static byte[] writePackageStreamHeader(OleBlob.Builder oleBuilder) { byte[] fileNameBytes = getZeroTermStrBytes(oleBuilder.getFileName()); byte[] filePathBytes = getZeroTermStrBytes(oleBuilder.getFilePath()); int headerLen = 6 + fileNameBytes.length + filePathBytes.length; if (oleBuilder.getType() == ContentType.SIMPLE_PACKAGE) { headerLen += 8 + filePathBytes.length; } else { headerLen += 2; } byte[] headerBytes = new byte[headerLen]; ByteBuffer bb = PageChannel.wrap(headerBytes); bb.putShort((short) PACKAGE_STREAM_SIGNATURE); bb.put(fileNameBytes); bb.put(filePathBytes); if (oleBuilder.getType() == ContentType.SIMPLE_PACKAGE) { bb.putInt(PS_EMBEDDED_FILE); bb.putInt(filePathBytes.length); bb.put(filePathBytes, 0, filePathBytes.length); bb.putInt((int) oleBuilder.getContentLength()); } else { bb.putInt(PS_LINKED_FILE); bb.putShort((short) LINK_HEADER); } return headerBytes; } private static byte[] writePackageStreamFooter(OleBlob.Builder oleBuilder) { // note, these are _not_ zero terminated byte[] fileNameBytes = oleBuilder.getFileName().getBytes(OLE_UTF_CHARSET); byte[] filePathBytes = oleBuilder.getFilePath().getBytes(OLE_UTF_CHARSET); int footerLen = 12 + (filePathBytes.length * 2) + fileNameBytes.length; byte[] footerBytes = new byte[footerLen]; ByteBuffer bb = PageChannel.wrap(footerBytes); bb.putInt(filePathBytes.length / 2); bb.put(filePathBytes); bb.putInt(fileNameBytes.length / 2); bb.put(fileNameBytes); bb.putInt(filePathBytes.length / 2); bb.put(filePathBytes); return footerBytes; } /** * creates the appropriate ContentImpl for the given blob. */ private static ContentImpl parseContent(OleBlobImpl blob) throws IOException { ByteBuffer bb = PageChannel.wrap(blob.getBytes()); if ((bb.remaining() < 2) || (bb.getShort() != PACKAGE_SIGNATURE)) { return new UnknownContentImpl(blob); } // read outer package header int headerSize = bb.getShort(); int objType = bb.getInt(); int prettyNameLen = bb.getShort(); int classNameLen = bb.getShort(); int prettyNameOff = bb.getShort(); int classNameOff = bb.getShort(); int objSize = bb.getInt(); String prettyName = readStr(bb, prettyNameOff, prettyNameLen); String className = readStr(bb, classNameOff, classNameLen); bb.position(headerSize); // read ole header int oleVer = bb.getInt(); int format = bb.getInt(); if (oleVer != OLE_VERSION) { return new UnknownContentImpl(blob); } int typeNameLen = bb.getInt(); String typeName = readStr(bb, bb.position(), typeNameLen); bb.getLong(); // unused int dataBlockLen = bb.getInt(); int dataBlockPos = bb.position(); if (SIMPLE_PACKAGE_TYPE.equalsIgnoreCase(typeName)) { return createSimplePackageContent(blob, prettyName, className, typeName, bb, dataBlockLen); } // if COMPOUND_FACTORY is null, the poi library isn't available, so just // load compound data as "other" if ((COMPOUND_FACTORY != null) && (bb.remaining() >= COMPOUND_STORAGE_SIGNATURE.length) && ByteUtil.matchesRange(bb, bb.position(), COMPOUND_STORAGE_SIGNATURE)) { return COMPOUND_FACTORY.createCompoundPackageContent(blob, prettyName, className, typeName, bb, dataBlockLen); } // this is either some other "special" (as yet unhandled) format, or it is // simply an embedded file (or it is compound data and poi isn't available) return new OtherContentImpl(blob, prettyName, className, typeName, dataBlockPos, dataBlockLen); } private static ContentImpl createSimplePackageContent(OleBlobImpl blob, String prettyName, String className, String typeName, ByteBuffer blobBb, int dataBlockLen) { int dataBlockPos = blobBb.position(); ByteBuffer bb = PageChannel.narrowBuffer(blobBb, dataBlockPos, dataBlockPos + dataBlockLen); int packageSig = bb.getShort(); if (packageSig != PACKAGE_STREAM_SIGNATURE) { return new OtherContentImpl(blob, prettyName, className, typeName, dataBlockPos, dataBlockLen); } String fileName = readZeroTermStr(bb); String filePath = readZeroTermStr(bb); int packageType = bb.getInt(); if (packageType == PS_EMBEDDED_FILE) { int localFilePathLen = bb.getInt(); String localFilePath = readStr(bb, bb.position(), localFilePathLen); int dataLen = bb.getInt(); int dataPos = bb.position(); bb.position(dataLen + dataPos); // remaining strings are in "reverse" order (local file path, file name, // file path). these string usee a real utf charset, and therefore can // "fix" problems with ascii based names (so we prefer these strings to // the original strings we found) int strNum = 0; while (true) { int rem = bb.remaining(); if (rem < 4) { break; } int strLen = bb.getInt(); String remStr = readStr(bb, bb.position(), strLen * 2, OLE_UTF_CHARSET); switch (strNum) { case 0: localFilePath = remStr; break; case 1: fileName = remStr; break; case 2: filePath = remStr; break; default: // ignore } ++strNum; } return new SimplePackageContentImpl(blob, prettyName, className, typeName, dataPos, dataLen, fileName, filePath, localFilePath); } if (packageType == PS_LINKED_FILE) { bb.getShort(); //unknown String linkStr = readZeroTermStr(bb); return new LinkContentImpl(blob, prettyName, className, typeName, fileName, linkStr, filePath); } return new OtherContentImpl(blob, prettyName, className, typeName, dataBlockPos, dataBlockLen); } private static String readStr(ByteBuffer bb, int off, int len) { return readStr(bb, off, len, OLE_CHARSET); } private static String readZeroTermStr(ByteBuffer bb) { int off = bb.position(); while (bb.hasRemaining()) { byte b = bb.get(); if (b == 0) { break; } } int len = bb.position() - off; return readStr(bb, off, len); } private static String readStr(ByteBuffer bb, int off, int len, Charset charset) { String str = new String(bb.array(), off, len, charset); bb.position(off + len); if (str.charAt(str.length() - 1) == '\0') { str = str.substring(0, str.length() - 1); } return str; } private static byte[] getZeroTermStrBytes(String str) { // since we are converting to ascii, try to make "nicer" versions of crazy // chars (e.g. convert "u with an umlaut" to just "u"). this may not // ultimately help anything but it is what ms access does. // decompose complex chars into combos of char and accent str = Normalizer.normalize(str, Normalizer.Form.NFD); // strip the accents str = UNICODE_ACCENT_PATTERN.matcher(str).replaceAll(""); // (re)normalize what is left str = Normalizer.normalize(str, Normalizer.Form.NFC); return (str + '\0').getBytes(OLE_CHARSET); } static final class OleBlobImpl implements OleBlob { private byte[] _bytes; private ContentImpl _content; private OleBlobImpl(byte[] bytes) { _bytes = bytes; } public void writeTo(OutputStream out) throws IOException { out.write(_bytes); } public Content getContent() throws IOException { if (_content == null) { _content = parseContent(this); } return _content; } public InputStream getBinaryStream() throws SQLException { return new ByteArrayInputStream(_bytes); } public InputStream getBinaryStream(long pos, long len) throws SQLException { return new ByteArrayInputStream(_bytes, fromJdbcOffset(pos), (int) len); } public long length() throws SQLException { return _bytes.length; } public byte[] getBytes() throws IOException { if (_bytes == null) { throw new IOException("blob is closed"); } return _bytes; } public byte[] getBytes(long pos, int len) throws SQLException { return ByteUtil.copyOf(_bytes, fromJdbcOffset(pos), len); } public long position(byte[] pattern, long start) throws SQLException { int pos = ByteUtil.findRange(PageChannel.wrap(_bytes), fromJdbcOffset(start), pattern); return ((pos >= 0) ? toJdbcOffset(pos) : pos); } public long position(Blob pattern, long start) throws SQLException { return position(pattern.getBytes(1L, (int) pattern.length()), start); } public OutputStream setBinaryStream(long position) throws SQLException { throw new SQLFeatureNotSupportedException(); } public void truncate(long len) throws SQLException { throw new SQLFeatureNotSupportedException(); } public int setBytes(long pos, byte[] bytes) throws SQLException { throw new SQLFeatureNotSupportedException(); } public int setBytes(long pos, byte[] bytes, int offset, int lesn) throws SQLException { throw new SQLFeatureNotSupportedException(); } public void free() { close(); } public void close() { _bytes = null; ByteUtil.closeQuietly(_content); _content = null; } private static int toJdbcOffset(int off) { return off + 1; } private static int fromJdbcOffset(long off) { return (int) off - 1; } @Override public String toString() { ToStringBuilder sb = CustomToStringStyle.builder(this); if (_content != null) { sb.append("content", _content); } else { sb.append("bytes", _bytes); sb.append("content", "(uninitialized)"); } return sb.toString(); } } static abstract class ContentImpl implements Content, Closeable { protected final OleBlobImpl _blob; protected ContentImpl(OleBlobImpl blob) { _blob = blob; } public OleBlobImpl getBlob() { return _blob; } protected byte[] getBytes() throws IOException { return getBlob().getBytes(); } public void close() { // base does nothing } protected ToStringBuilder toString(ToStringBuilder sb) { sb.append("type", getType()); return sb; } } static abstract class EmbeddedContentImpl extends ContentImpl implements EmbeddedContent { private final int _position; private final int _length; protected EmbeddedContentImpl(OleBlobImpl blob, int position, int length) { super(blob); _position = position; _length = length; } public long length() { return _length; } public InputStream getStream() throws IOException { return new ByteArrayInputStream(getBytes(), _position, _length); } public void writeTo(OutputStream out) throws IOException { out.write(getBytes(), _position, _length); } @Override protected ToStringBuilder toString(ToStringBuilder sb) { super.toString(sb); if (_position >= 0) { sb.append("content", ByteBuffer.wrap(_blob._bytes, _position, _length)); } return sb; } } static abstract class EmbeddedPackageContentImpl extends EmbeddedContentImpl implements PackageContent { private final String _prettyName; private final String _className; private final String _typeName; protected EmbeddedPackageContentImpl(OleBlobImpl blob, String prettyName, String className, String typeName, int position, int length) { super(blob, position, length); _prettyName = prettyName; _className = className; _typeName = typeName; } public String getPrettyName() { return _prettyName; } public String getClassName() { return _className; } public String getTypeName() { return _typeName; } @Override protected ToStringBuilder toString(ToStringBuilder sb) { sb.append("prettyName", _prettyName).append("className", _className).append("typeName", _typeName); super.toString(sb); return sb; } } private static final class LinkContentImpl extends EmbeddedPackageContentImpl implements LinkContent { private final String _fileName; private final String _linkPath; private final String _filePath; private LinkContentImpl(OleBlobImpl blob, String prettyName, String className, String typeName, String fileName, String linkPath, String filePath) { super(blob, prettyName, className, typeName, -1, -1); _fileName = fileName; _linkPath = linkPath; _filePath = filePath; } public ContentType getType() { return ContentType.LINK; } public String getFileName() { return _fileName; } public String getLinkPath() { return _linkPath; } public String getFilePath() { return _filePath; } public InputStream getLinkStream() throws IOException { return new FileInputStream(getLinkPath()); } @Override public String toString() { return toString(CustomToStringStyle.builder(this)).append("fileName", _fileName) .append("linkPath", _linkPath).append("filePath", _filePath).toString(); } } private static final class SimplePackageContentImpl extends EmbeddedPackageContentImpl implements SimplePackageContent { private final String _fileName; private final String _filePath; private final String _localFilePath; private SimplePackageContentImpl(OleBlobImpl blob, String prettyName, String className, String typeName, int position, int length, String fileName, String filePath, String localFilePath) { super(blob, prettyName, className, typeName, position, length); _fileName = fileName; _filePath = filePath; _localFilePath = localFilePath; } public ContentType getType() { return ContentType.SIMPLE_PACKAGE; } public String getFileName() { return _fileName; } public String getFilePath() { return _filePath; } public String getLocalFilePath() { return _localFilePath; } @Override public String toString() { return toString(CustomToStringStyle.builder(this)).append("fileName", _fileName) .append("filePath", _filePath).append("localFilePath", _localFilePath).toString(); } } private static final class OtherContentImpl extends EmbeddedPackageContentImpl implements OtherContent { private OtherContentImpl(OleBlobImpl blob, String prettyName, String className, String typeName, int position, int length) { super(blob, prettyName, className, typeName, position, length); } public ContentType getType() { return ContentType.OTHER; } @Override public String toString() { return toString(CustomToStringStyle.builder(this)).toString(); } } private static final class UnknownContentImpl extends ContentImpl { private UnknownContentImpl(OleBlobImpl blob) { super(blob); } public ContentType getType() { return ContentType.UNKNOWN; } @Override public String toString() { return toString(CustomToStringStyle.builder(this)).append("content", _blob._bytes).toString(); } } }