Java tutorial
/* * @(#)$Id:BasicStringChunk.java 2335 2007-07-17 04:14:15Z yui $ * * Copyright 2006-2008 Makoto YUI * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Contributors: * Makoto YUI - initial implementation */ package xbird.xquery.misc; import java.io.File; import java.io.IOException; import java.io.ObjectInput; import java.io.ObjectOutput; import java.util.ArrayList; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import xbird.storage.DbCollection; import xbird.storage.io.Segments; import xbird.storage.io.VarSegments; import xbird.storage.io.VarSegments.DescriptorType; import xbird.util.collections.ints.Int2IntHash.Int2IntLRUMap; import xbird.util.compress.CompressionCodec; import xbird.util.compress.CompressorFactory; import xbird.util.hashes.HashUtils; import xbird.util.lang.ArrayUtils; import xbird.util.primitive.Primitives; import xbird.util.resource.PropertyMap; import xbird.util.string.StringUtils; /** * * <DIV lang="en"></DIV> * <DIV lang="ja"></DIV> * * @author Makoto YUI (yuin405 AT gmail.com) */ public final class BasicStringChunk implements IStringChunk { private static final long serialVersionUID = -2802997860361606643L; private static final Log LOG = LogFactory.getLog(StringChunk.class); private static final int DEFAULT_PAGES = 8; private static final float ENLARGE_PAGES_FACTOR = 1.4f; private static final int BLOCK_SHIFT = 12; private static final int BLOCK_SIZE = 1 << BLOCK_SHIFT; // 2^12 = 4096 = 4KB private static final long BLOCK_MASK = BLOCK_SIZE - 1L; //Caution: CHUNKED_THRESHOLD must be less than Character.MAX_VALUE (2^16=65536). private static final int CHUNKED_THRESHOLD = BLOCK_SIZE / 8; // 512 byte private static final int MAX_CHUNK_POINTER = (1 << 31) - 1; private static final int DEFUALT_BIG_STRINGS_SIZE = (int) (DEFAULT_PAGES * ENLARGE_PAGES_FACTOR * 4); private static final long BIG_STRING_MASK = 1L; // -------------------------------------------- // transient stuff private final char[] tmpBuf = new char[CHUNKED_THRESHOLD]; /** map for duplicate value management. key: chunk hash value, value: address */ private transient final Int2IntLRUMap _hashv2addr = new Int2IntLRUMap(1024); //-------------------------------------------- // persistent stuff private List<String> _strPool; private char[][] _cchunks; private long _cpointer = 0; private final CompressionCodec compressor; //-------------------------------------------- public BasicStringChunk() { this._strPool = new ArrayList<String>(DEFUALT_BIG_STRINGS_SIZE); this._cchunks = new char[DEFAULT_PAGES][]; this._cchunks[0] = new char[BLOCK_SIZE]; this.compressor = CompressorFactory.createCodec(); } //-------------------------------------------- public void get(final long addr, final StringBuilder sb) { final long ptr = indexOf(addr); if ((addr & BIG_STRING_MASK) != 0) { // is big string assert (ptr <= 0x7fffffffL) : ptr; final String s = _strPool.get((int) ptr); sb.append(s); } else { final int page = (int) ptr >> BLOCK_SHIFT; final int block = (int) (ptr & BLOCK_MASK); final char[] cc = _cchunks[page]; final int length = cc[block]; // the first char is the length of the string sb.append(cc, block + 1, length); } } public String getString(final long addr) { final long ptr = indexOf(addr); final String ret; if ((addr & BIG_STRING_MASK) != 0L) { // is big string ret = _strPool.get((int) ptr); } else { final int page = (int) ptr >> BLOCK_SHIFT; final int block = (int) (ptr & BLOCK_MASK); final char[] cp = _cchunks[page]; final int length = cp[block]; // the first char is the length of the string ret = new String(cp, block + 1, length); } return ret; } public long getBufferAddress(long addr) { throw new UnsupportedOperationException(); } public long store(final char[] ch, final int start, final int length) { final int raddr; if (length >= CHUNKED_THRESHOLD) { raddr = allocateStringChunk(new String(ch, start, length)); } else { raddr = storeCharChunk(ch, start, length); } return raddr; } public long store(final String s) { assert (s != null); final int strlen = s.length(); if (strlen < CHUNKED_THRESHOLD) { s.getChars(0, strlen, tmpBuf, 0); return store(tmpBuf, 0, strlen); } return allocateStringChunk(s); } private int allocateStringChunk(final String s) { final int index = _strPool.size(); _strPool.add(s); return stringKey(index); } private static int stringKey(final int index) { return (index << 1) + 1; } private int storeCharChunk(final char[] ch, final int start, final int length) { final int hcode = HashUtils.hashCode(ch, start, length); final int haddr = _hashv2addr.get(hcode); if (haddr != -1) { final char[] strInAddr = getStoredChars(haddr); assert (strInAddr != null); if (ArrayUtils.equals(strInAddr, ch, start, length)) { return haddr; } else { _hashv2addr.remove(hcode); } } final int raddr = allocateCharChunk(ch, start, length); _hashv2addr.put(hcode, raddr); return raddr; } @Deprecated private char[] getStoredChars(final int addr) { final long ptr = indexOf(addr); final long lp = ptr >> BLOCK_SHIFT; if (lp > 0x7fffffffL) { throw new IllegalStateException("Illegal page number: " + lp); } final int page = (int) lp; final char[] cc = _cchunks[page]; final int block = (int) (ptr & BLOCK_MASK); final int length = cc[block]; // the first char is the length of the string final int from = block + 1; return ArrayUtils.copyOfRange(cc, from, from + length); } private int allocateCharChunk(final char[] src, final int start, final int length) { assert (length < CHUNKED_THRESHOLD) : length; final int reqlen = length + 1; // actual length is store in first char. final long lpage; if (((_cpointer & BLOCK_MASK) + reqlen) > BLOCK_SIZE) { // spanning pages is not allowed, allocate in next chunk. lpage = (_cpointer >> BLOCK_SHIFT) + 1; if (lpage > Integer.MAX_VALUE) { throw new IllegalStateException("Assained page exceeds system's limit: " + lpage); } this._cpointer = lpage * BLOCK_SIZE; } else { lpage = _cpointer >> BLOCK_SHIFT; } final int page = (int) lpage; if (page >= _cchunks.length) { enlarge((int) (_cchunks.length * ENLARGE_PAGES_FACTOR)); } if (_cchunks[page] == null) { _cchunks[page] = new char[BLOCK_SIZE]; } final long lblock = _cpointer & BLOCK_MASK; if (lblock > Integer.MAX_VALUE) { throw new IllegalStateException("Assained block exceeds system's limit: " + lblock); } final int block = (int) lblock; assert (length < Character.MAX_VALUE) : length; _cchunks[page][block] = (char) length; System.arraycopy(src, start, _cchunks[page], block + 1, length); final long index = _cpointer; _cpointer += reqlen; // move ahead pointer return chunkKey(index); } private static int chunkKey(final long index) { if (index > MAX_CHUNK_POINTER) { throw new IllegalStateException( "Assained key '" + index + "' exceeds system's limit '" + MAX_CHUNK_POINTER + '\''); } return (int) index << 1; } private static long indexOf(final long addr) { return addr >>> 1; } private void enlarge(final int pages) { final char[][] newchunks = new char[pages][]; System.arraycopy(_cchunks, 0, newchunks, 0, _cchunks.length); this._cchunks = newchunks; } public void flush(final DbCollection coll, final String docName, final PropertyMap docProps) throws IOException { final File chunkFile = getChunkFile(coll, docName); assert (!chunkFile.exists()) : "file already exists: " + chunkFile.getAbsolutePath(); final int splen = _strPool.size(); final Segments paged = new VarSegments(chunkFile, DescriptorType.hash); for (int i = 0; i < splen; i++) {// big string final String s = _strPool.get(i); final byte[] b = compressor.compress(StringUtils.getBytes(s)); final int addr = stringKey(i); paged.write(addr, b); } _strPool.clear(); final long lcclen = _cpointer >> BLOCK_SHIFT; assert (lcclen <= Integer.MAX_VALUE) : lcclen; final int cclen = Math.min((int) lcclen, _cchunks.length - 1); for (int i = 0; i <= cclen; i++) { final char[] c = _cchunks[i]; assert (c != null); final byte[] b = compress(compressor, c); final int addr = chunkKey(i * BLOCK_SIZE); paged.write(addr, b); _cchunks[i] = null; } docProps.setProperty(KEY_STRPOOL_WRITTEN, String.valueOf(splen)); docProps.setProperty(KEY_CHUNK_WRITTEN, String.valueOf(cclen)); paged.flush(false); close(); LOG.info("write string chunk file:" + chunkFile.getAbsolutePath()); } private static File getChunkFile(final DbCollection coll, final String docName) { final File baseDir = new File(coll.getAbsolutePath()); assert (baseDir.exists() && baseDir.isDirectory()); final File chunkFile = new File(baseDir, docName + STRING_CHUNK_FILE_SUFFIX); return chunkFile; } private final byte[] compress(final CompressionCodec compressor, final char[] c) { final byte[] b = Primitives.toBytes(c); return compressor.compress(b); } public void readExternal(final ObjectInput in) throws IOException, ClassNotFoundException { this._strPool = (List<String>) in.readObject(); this._cchunks = (char[][]) in.readObject(); this._cpointer = in.readLong(); } public void writeExternal(final ObjectOutput out) throws IOException { out.writeObject(_strPool); out.writeObject(_cchunks); out.writeLong(_cpointer); } public void close() throws IOException { this._strPool = null; this._cchunks = null; _hashv2addr.close(); } public int getAndIncrementReferenceCount() { return 1; // dummy } }