Java tutorial
/* * Copyright 2003-2016 MarkLogic Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.marklogic.tree; import java.io.ByteArrayInputStream; import java.io.DataInput; import java.io.IOException; import java.nio.charset.Charset; import java.util.zip.Inflater; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.marklogic.dom.NodeImpl; import com.marklogic.io.BiendianDataInputStream; import com.marklogic.io.Decoder; /** * Decoder of Compressed Tree. * * @author jchen */ public class CompressedTreeDecoder { public static final Log LOG = LogFactory.getLog(CompressedTreeDecoder.class); private static final Charset UTF8 = Charset.forName("UTF8"); private static final byte xmlURIBytes[] = "http://www.w3.org/XML/1998/namespace".getBytes(UTF8); private static final byte xsiURIBytes[] = "http://www.w3.org/2001/XMLSchema-instance".getBytes(UTF8); private static final byte spaceBytes[] = "space".getBytes(UTF8); private static final byte langBytes[] = "lang".getBytes(UTF8); private static final byte baseBytes[] = "base".getBytes(UTF8); private static final byte typeBytes[] = "type".getBytes(UTF8); static final int MAX_BINARY_BYTES = 512 << 20; // 512 MB private static final int xmlSpaceAttrPresentFlag = 0x01; private static final int xmlLangAttrPresentFlag = 0x02; private static final int xmlBaseAttrPresentFlag = 0x04; private static final int xsiTypeAttrPresentFlag = 0x08; public String utf8(String s) { byte b[] = s.getBytes(UTF8); StringBuilder buf = new StringBuilder(); for (int i = 0; i < b.length; ++i) { buf.append(String.format("%02x", b[i] & 0xff)); } return buf.toString(); } private void decodeText(ExpandedTree rep, Decoder decoder, int atomLimit) throws IOException { if (atomLimit == 0) return; int numAtoms = decoder.decodeUnsigned(); int index = rep.numTextReps; int minSize = rep.numTextReps + numAtoms + 1; if (rep.textReps == null) { int size = Math.max(rep.atomLimit * 16, minSize); rep.textReps = new int[size]; } else if (rep.textReps.length < minSize) { int size = Math.max(rep.textReps.length * 2, minSize); int textReps[] = new int[size]; System.arraycopy(rep.textReps, 0, textReps, 0, index); rep.textReps = textReps; } rep.textReps[index++] = numAtoms; rep.numTextReps += numAtoms + 1; for (int j = 0; j < numAtoms; j++) { int atom = decoder.decodeUnsigned(); assert (atom < atomLimit); rep.textReps[index++] = atom; } } private void addText(ExpandedTree rep, int numKeys) throws IOException { if (numKeys == 0) return; int index = rep.numTextReps; int minSize = rep.numTextReps + numKeys + 1; if (rep.textReps == null) { int size = Math.max(rep.atomLimit * 16, minSize); rep.textReps = new int[size]; } else if (rep.textReps.length < minSize) { int size = Math.max(rep.textReps.length * 2, minSize); int textReps[] = new int[size]; System.arraycopy(rep.textReps, 0, textReps, 0, index); rep.textReps = textReps; } } private int pow2ceil(int x) { int y = 8; while (y < x) y <<= 1; return y; } public ExpandedTree decode(byte[] buf, int len) throws IOException { String bad; ByteArrayInputStream bis = new ByteArrayInputStream(buf); BiendianDataInputStream is = new BiendianDataInputStream(bis); Decoder decoder = new Decoder(is); ExpandedTree rep = new ExpandedTree(); rep.uriKey = decoder.decode64bits(); rep.uniqKey = decoder.decode64bits(); rep.linkKey = decoder.decode64bits(); if (rep.linkKey == -1) { rep.linkKey = decoder.decode64bits(); Inflater decompresser = new Inflater(); decompresser.setInput(buf, 32, len - 32); int resultLength = 0; int offset = 0; int buflen = Math.min((pow2ceil(len) * 2), (1 << 28)); byte[] result = new byte[buflen]; try { for (;;) { resultLength += decompresser.inflate(result, offset, buflen - offset); if (decompresser.finished()) { break; } offset = buflen; buflen += Math.min(buflen, (1 << 28)); result = java.util.Arrays.copyOf(result, buflen); } } catch (java.util.zip.DataFormatException ex) { throw new IOException("zip inflate failed"); } bis = new ByteArrayInputStream(result, 0, resultLength); is = new BiendianDataInputStream(bis); decoder = new Decoder(is); } rep.numKeys = decoder.decodeUnsigned(); if (rep.numKeys == 0) rep.keys = null; else { rep.keys = new long[rep.numKeys]; for (int i = 0; i < rep.numKeys; ++i) { rep.keys[i] = decoder.decode64bits(); } } if (LOG.isTraceEnabled()) { LOG.trace(String.format("uriKey %016x", rep.uriKey)); LOG.trace(String.format("uniqKey %016x", rep.uniqKey)); LOG.trace(String.format("linkKey %016x", rep.linkKey)); for (int i = 0; i < rep.numKeys; ++i) { LOG.trace(String.format(" key[%d] %016x", i, rep.keys[i])); } } // atoms int numAtomDataWords = decoder.decodeUnsigned(); if (LOG.isTraceEnabled()) LOG.trace(String.format("numAtomDataWords %d", numAtomDataWords)); if (numAtomDataWords == 0) rep.atomData = null; else { rep.atomData = new byte[numAtomDataWords * 4]; for (int i = 0, j = 0; i < numAtomDataWords; ++i) { int word = decoder.decode32bits(); rep.atomData[j++] = (byte) (word & 0xff); rep.atomData[j++] = (byte) ((word >> 8) & 0xff); rep.atomData[j++] = (byte) ((word >> 16) & 0xff); rep.atomData[j++] = (byte) ((word >> 24) & 0xff); if (LOG.isTraceEnabled()) { LOG.trace(String.format(" atomData[%d] %08x", i, word)); LOG.trace(String.format(" atomData[%d] %02x %02x %02x %02x", i, rep.atomData[i * 4], rep.atomData[i * 4 + 1], rep.atomData[i * 4 + 2], rep.atomData[i * 4 + 3])); } } } rep.atomLimit = decoder.decodeUnsigned(); if (LOG.isTraceEnabled()) { LOG.trace(String.format("atomLimit %d", rep.atomLimit)); } if (rep.atomLimit == 0) { rep.atomIndex = null; } else { rep.atomIndex = new int[rep.atomLimit + 1]; int j = 0; for (int i = 0; i < rep.atomLimit; ++i) { rep.atomIndex[i] = j; if (LOG.isTraceEnabled()) LOG.trace(String.format(" atomIndex[%d] %08x", i, rep.atomIndex[i])); if (rep.atomData != null) while (rep.atomData[j++] != 0) ; } rep.atomIndex[rep.atomLimit] = j; } for (int i = 0; i < rep.atomLimit; ++i) { if (LOG.isTraceEnabled()) LOG.trace(String.format(" atomString[%d] %s", i, rep.atomString(i))); } // node names int numNodeNameReps = decoder.decodeUnsigned(); if (LOG.isTraceEnabled()) LOG.trace(String.format("numNodeNameReps %d", numNodeNameReps)); if (numNodeNameReps == 0) { rep.nodeNameNameAtom = null; rep.nodeNameNamespaceAtom = null; } else { rep.nodeNameNameAtom = new int[numNodeNameReps]; rep.nodeNameNamespaceAtom = new int[numNodeNameReps]; } int xmlSpaceNodeNameRepID = Integer.MAX_VALUE; int xmlLangNodeNameRepID = Integer.MAX_VALUE; int xmlBaseNodeNameRepID = Integer.MAX_VALUE; int xsiTypeNodeNameRepID = Integer.MAX_VALUE; for (int j = 0; j < numNodeNameReps; j++) { rep.nodeNameNameAtom[j] = decoder.decodeUnsigned(); if (LOG.isTraceEnabled()) LOG.trace(String.format(" nodeNameNameAtom[%d] %d", j, rep.nodeNameNameAtom[j])); assert (rep.nodeNameNameAtom[j] < rep.atomLimit); rep.nodeNameNamespaceAtom[j] = decoder.decodeUnsigned(); if (LOG.isTraceEnabled()) LOG.trace(String.format(" nodeNameNamespaceAtom[%d] %d", j, rep.nodeNameNamespaceAtom[j])); assert (rep.nodeNameNamespaceAtom[j] < rep.atomLimit); if (rep.atomEquals(rep.nodeNameNamespaceAtom[j], xmlURIBytes)) { if (rep.atomEquals(rep.nodeNameNameAtom[j], spaceBytes)) xmlSpaceNodeNameRepID = j; else if (rep.atomEquals(rep.nodeNameNameAtom[j], langBytes)) { xmlLangNodeNameRepID = j; } else if (rep.atomEquals(rep.nodeNameNameAtom[j], baseBytes)) xmlBaseNodeNameRepID = j; } else if (rep.atomEquals(rep.nodeNameNameAtom[j], xsiURIBytes)) { if (rep.atomEquals(rep.nodeNameNameAtom[j], typeBytes)) xsiTypeNodeNameRepID = j; } } if (LOG.isTraceEnabled()) { LOG.trace(String.format("xmlSpaceNodeNameRepID %d", xmlSpaceNodeNameRepID)); LOG.trace(String.format("xmlLangNodeNameRepID %d", xmlLangNodeNameRepID)); LOG.trace(String.format("xmlBaseNodeNameRepID %d", xmlBaseNodeNameRepID)); LOG.trace(String.format("xsiTypeNodeNameRepID %d", xsiTypeNodeNameRepID)); } int numElemNodeReps = 0; int numAttrNodeReps = 0; int numDocNodeReps = 0; int numPINodeReps = 0; int numArrayNodeReps = 0; int numDoubles = 0; // node counts rep.numNodeReps = decoder.decodeUnsigned(); if (LOG.isTraceEnabled()) LOG.trace(String.format("numNodeReps %d", rep.numNodeReps)); if (rep.numNodeReps == 0) { // escape int version = decoder.decodeUnsigned(); if (LOG.isTraceEnabled()) LOG.trace(String.format("version %d", version)); assert (version == 0); rep.numNodeReps = decoder.decodeUnsigned(); if (LOG.isTraceEnabled()) LOG.trace(String.format("rep.numNodeReps %d", rep.numNodeReps)); if (rep.numNodeReps > 0) { rep.nodes = new NodeImpl[rep.numNodeReps]; rep.nodeOrdinal = new long[rep.numNodeReps]; rep.nodeKind = new byte[rep.numNodeReps]; rep.nodeRepID = new int[rep.numNodeReps]; rep.nodeParentNodeRepID = new int[rep.numNodeReps]; } numArrayNodeReps = decoder.decodeUnsigned(); if (LOG.isTraceEnabled()) LOG.trace(String.format("numArrayNodeReps %d", numArrayNodeReps)); if (numArrayNodeReps > 0) { rep.arrayNodeTextRepID = new int[numArrayNodeReps]; rep.arrayNodeChildNodeRepID = new int[numArrayNodeReps]; rep.arrayNodeNumChildren = new int[numArrayNodeReps]; } numDoubles = decoder.decodeUnsigned(); if (LOG.isTraceEnabled()) LOG.trace(String.format("numDoubles %d", numDoubles)); if (numDoubles > 0) { rep.doubles = new double[numDoubles]; } numDocNodeReps = decoder.decodeUnsigned(); if (LOG.isTraceEnabled()) LOG.trace(String.format("numDocNodeReps %d", numDocNodeReps)); if (numDocNodeReps > 0) { rep.docNodeTextRepID = new int[numDocNodeReps]; rep.docNodeChildNodeRepID = new int[numDocNodeReps]; rep.docNodeNumChildren = new int[numDocNodeReps]; } } else { // compat if (rep.numNodeReps > 0) { rep.nodes = new NodeImpl[rep.numNodeReps]; rep.nodeOrdinal = new long[rep.numNodeReps]; rep.nodeKind = new byte[rep.numNodeReps]; rep.nodeRepID = new int[rep.numNodeReps]; rep.nodeParentNodeRepID = new int[rep.numNodeReps]; } numElemNodeReps = decoder.decodeUnsigned(); if (LOG.isTraceEnabled()) LOG.trace(String.format("numElemNodeReps %d", numElemNodeReps)); if (numElemNodeReps > 0) { rep.elemNodeNodeNameRepID = new int[numElemNodeReps]; rep.elemNodeAttrNodeRepID = new int[numElemNodeReps]; rep.elemNodeChildNodeRepID = new int[numElemNodeReps]; rep.elemNodeElemDeclRepID = new int[numElemNodeReps]; rep.elemNodeNumAttributes = new int[numElemNodeReps]; rep.elemNodeNumDefaultAttrs = new int[numElemNodeReps]; rep.elemNodeNumChildren = new int[numElemNodeReps]; rep.elemNodeFlags = new int[numElemNodeReps]; } numAttrNodeReps = decoder.decodeUnsigned(); if (LOG.isTraceEnabled()) LOG.trace(String.format("numAttrNodeReps %d", numAttrNodeReps)); if (numAttrNodeReps > 0) { rep.attrNodeNodeNameRepID = new int[numAttrNodeReps]; rep.attrNodeTextRepID = new int[numAttrNodeReps]; rep.attrNodeAttrDeclRepID = new int[numAttrNodeReps]; } rep.numLinkNodeReps = decoder.decodeUnsigned() * 4 / 3; if (LOG.isTraceEnabled()) LOG.trace(String.format("numLinkNodeReps %d", rep.numLinkNodeReps)); if (rep.numLinkNodeReps > 0) { rep.linkNodeKey = new long[rep.numLinkNodeReps]; rep.linkNodeNodeCount = new long[rep.numLinkNodeReps]; rep.linkNodeNodeNameRepID = new int[rep.numLinkNodeReps]; rep.linkNodeNodeRepID = new int[rep.numLinkNodeReps]; } numDocNodeReps = decoder.decodeUnsigned(); if (LOG.isTraceEnabled()) LOG.trace(String.format("numDocNodeReps %d", numDocNodeReps)); if (numDocNodeReps > 0) { rep.docNodeTextRepID = new int[numDocNodeReps]; rep.docNodeChildNodeRepID = new int[numDocNodeReps]; rep.docNodeNumChildren = new int[numDocNodeReps]; } numPINodeReps = decoder.decodeUnsigned(); if (LOG.isTraceEnabled()) LOG.trace(String.format("numPINodeReps %d", numPINodeReps)); if (numPINodeReps > 0) { rep.piNodeTargetAtom = new int[numPINodeReps]; rep.piNodeTextRepID = new int[numPINodeReps]; } rep.numNSNodeReps = decoder.decodeUnsigned(); if (LOG.isTraceEnabled()) LOG.trace(String.format("numNSNodeReps %d", rep.numNSNodeReps)); if (rep.numNSNodeReps > 0) { rep.nsNodeOrdinal = new long[rep.numNSNodeReps]; rep.nsNodePrevNSNodeRepID = new int[rep.numNSNodeReps]; rep.nsNodePrefixAtom = new int[rep.numNSNodeReps]; rep.nsNodeUriAtom = new int[rep.numNSNodeReps]; } } rep.numPermNodeReps = decoder.decodeUnsigned(); if (LOG.isTraceEnabled()) LOG.trace(String.format("numPermNodeReps %d", rep.numPermNodeReps)); if (rep.numPermNodeReps > 0) { rep.permNodeOrdinal = new long[rep.numPermNodeReps]; rep.permNodePrevPermNodeRepID = new int[rep.numPermNodeReps]; rep.permNodeCapability = new Capability[rep.numPermNodeReps]; rep.permNodeRoleId = new long[rep.numPermNodeReps]; } // uri atoms rep.uriTextRepID = 0; decodeText(rep, decoder, rep.atomLimit); // collection atoms rep.colsTextRepID = rep.numTextReps; decodeText(rep, decoder, rep.atomLimit); // nodes int nextDocNodeRep = 0; int nextElemNodeRep = 0; int nextAttrNodeRep = 0; int nextPINodeRep = 0; int nextNSNodeRep = 0; int nextPermNodeRep = 0; int parentNodeRepID = 0; int nextArrayNodeRep = 0; int nextDouble = 0; long lastNSNodeRepOrdinal = 0; long lastPermNodeRepOrdinal = 0; for (int i = 0; i < rep.numNodeReps; i++) { rep.nodeKind[i] = (byte) decoder.decodeUnsigned(4); if (LOG.isTraceEnabled()) LOG.trace(String.format(" nodeKind[%d] %s", i, rep.nodeKind[i])); //assert (rep.nodeKind[i] != NodeKind.NULL); parentNodeRepID += decoder.decodeUnsigned(); if (LOG.isTraceEnabled()) LOG.trace(String.format(" parentNodeRepID[%d] %d", i, parentNodeRepID)); assert (parentNodeRepID <= i); if (parentNodeRepID == i) rep.nodeParentNodeRepID[i] = Integer.MAX_VALUE; else { rep.nodeParentNodeRepID[i] = parentNodeRepID; assert (rep.nodeKind[parentNodeRepID] == NodeKind.ELEM || rep.nodeKind[parentNodeRepID] == NodeKind.DOC || rep.nodeKind[parentNodeRepID] == NodeKind.ARRAY || rep.nodeKind[parentNodeRepID] == NodeKind.OBJECT || rep.nodeKind[parentNodeRepID] == NodeKind.LINK); int parentRepID = rep.nodeRepID[parentNodeRepID]; switch (rep.nodeKind[parentNodeRepID]) { case NodeKind.ELEM: { switch (rep.nodeKind[i]) { case NodeKind.ATTR: if (rep.elemNodeAttrNodeRepID[parentRepID] == Integer.MAX_VALUE) rep.elemNodeAttrNodeRepID[parentRepID] = i; assert (rep.elemNodeAttrNodeRepID[parentRepID] + rep.elemNodeNumAttributes[parentRepID] == i); ++rep.elemNodeNumAttributes[parentRepID]; break; default: if (rep.elemNodeChildNodeRepID[parentRepID] == Integer.MAX_VALUE) rep.elemNodeChildNodeRepID[parentRepID] = i; assert (rep.elemNodeChildNodeRepID[parentRepID] + rep.elemNodeNumChildren[parentRepID] == i); ++rep.elemNodeNumChildren[parentRepID]; } break; } case NodeKind.DOC: { if (rep.docNodeChildNodeRepID[parentNodeRepID] == Integer.MAX_VALUE) rep.docNodeChildNodeRepID[parentNodeRepID] = i; assert (rep.docNodeChildNodeRepID[parentNodeRepID] + rep.docNodeNumChildren[parentNodeRepID] == i); ++rep.docNodeNumChildren[parentNodeRepID]; break; } case NodeKind.ARRAY: case NodeKind.OBJECT: { if (rep.arrayNodeChildNodeRepID[parentRepID] == Integer.MAX_VALUE) rep.arrayNodeChildNodeRepID[parentRepID] = i; assert (rep.arrayNodeChildNodeRepID[parentRepID] + rep.arrayNodeNumChildren[parentRepID] == i); ++rep.arrayNodeNumChildren[parentRepID]; break; } default: break; } } switch (rep.nodeKind[i]) { case NodeKind.ELEM: { int j = nextElemNodeRep++; rep.nodeRepID[i] = j; assert (j < numElemNodeReps); rep.elemNodeNodeNameRepID[j] = decoder.decodeUnsigned(); rep.elemNodeAttrNodeRepID[j] = Integer.MAX_VALUE; rep.elemNodeChildNodeRepID[j] = Integer.MAX_VALUE; rep.elemNodeElemDeclRepID[j] = Integer.MAX_VALUE; rep.elemNodeNumAttributes[j] = 0; rep.elemNodeNumDefaultAttrs[j] = 0; rep.elemNodeNumChildren[j] = 0; rep.elemNodeFlags[j] = 0; if (rep.elemNodeNodeNameRepID[j] >= numNodeNameReps) { rep.elemNodeNumDefaultAttrs[j] = rep.elemNodeNodeNameRepID[j] / numNodeNameReps; rep.elemNodeNodeNameRepID[j] = rep.elemNodeNodeNameRepID[j] % numNodeNameReps; } break; } case NodeKind.ATTR: { assert (parentNodeRepID < i); assert (rep.nodeKind[parentNodeRepID] == NodeKind.ELEM); rep.nodeRepID[i] = nextAttrNodeRep++; assert (rep.nodeRepID[i] < numAttrNodeReps); rep.attrNodeNodeNameRepID[rep.nodeRepID[i]] = decoder.decodeUnsigned(); assert (rep.attrNodeNodeNameRepID[rep.nodeRepID[i]] < numNodeNameReps); if (rep.attrNodeNodeNameRepID[rep.nodeRepID[i]] == xmlSpaceNodeNameRepID) rep.elemNodeFlags[rep.nodeRepID[parentNodeRepID]] |= xmlSpaceAttrPresentFlag; else if (rep.attrNodeNodeNameRepID[rep.nodeRepID[i]] == xmlLangNodeNameRepID) rep.elemNodeFlags[rep.nodeRepID[parentNodeRepID]] |= xmlLangAttrPresentFlag; else if (rep.attrNodeNodeNameRepID[rep.nodeRepID[i]] == xmlBaseNodeNameRepID) rep.elemNodeFlags[rep.nodeRepID[parentNodeRepID]] |= xmlBaseAttrPresentFlag; else if (rep.attrNodeNodeNameRepID[rep.nodeRepID[i]] == xsiTypeNodeNameRepID) rep.elemNodeFlags[rep.nodeRepID[parentNodeRepID]] |= xsiTypeAttrPresentFlag; rep.attrNodeTextRepID[rep.nodeRepID[i]] = rep.numTextReps; decodeText(rep, decoder, rep.atomLimit); rep.attrNodeAttrDeclRepID[rep.nodeRepID[i]] = Integer.MAX_VALUE; break; } case NodeKind.TEXT: { rep.nodeRepID[i] = rep.numTextReps; decodeText(rep, decoder, rep.atomLimit); break; } case NodeKind.BINARY: { rep.nodeRepID[i] = 0; int nbytes = decoder.decodeUnsigned(); if (nbytes > MAX_BINARY_BYTES) { // large binary rep.binaryKey = decoder.decode64bits(); rep.binaryOffset = decoder.decodeUnsignedLong(); rep.binarySize = decoder.decodeUnsignedLong(); rep.binaryOrigLen = decoder.decodeUnsignedLong(); rep.binaryPathAtom = decoder.decodeUnsigned(); } else { decodeBinary(decoder, rep, nbytes); } break; } case NodeKind.PI: { int piNodeRep = rep.nodeRepID[i] = nextPINodeRep++; assert (piNodeRep < numPINodeReps); int targetAtom = rep.piNodeTargetAtom[piNodeRep] = decoder.decodeUnsigned(); assert (targetAtom < rep.atomLimit); rep.piNodeTextRepID[piNodeRep] = rep.numTextReps; decodeText(rep, decoder, rep.atomLimit); break; } case NodeKind.LINK: { long key = decoder.decode64bits(); int linkNodeRep = (int) (key % rep.numLinkNodeReps); while (true) { if (rep.linkNodeKey[linkNodeRep] == 0) { rep.nodeRepID[i] = linkNodeRep; rep.linkNodeKey[linkNodeRep] = key; rep.linkNodeNodeCount[linkNodeRep] = decoder.decodeUnsignedLong(); rep.linkNodeNodeNameRepID[linkNodeRep] = decoder.decodeUnsigned(); assert (rep.linkNodeNodeNameRepID[linkNodeRep] < numNodeNameReps); rep.linkNodeNodeRepID[linkNodeRep] = i; break; } linkNodeRep = hashWrap(linkNodeRep + 1, rep.numLinkNodeReps); } break; } case NodeKind.COMMENT: { rep.nodeRepID[i] = rep.numTextReps; decodeText(rep, decoder, rep.atomLimit); break; } case NodeKind.DOC: { int docNode = rep.nodeRepID[i] = nextDocNodeRep++; assert (docNode < numDocNodeReps); rep.docNodeTextRepID[i] = rep.numTextReps; decodeText(rep, decoder, rep.atomLimit); rep.docNodeChildNodeRepID[docNode] = Integer.MAX_VALUE; rep.docNodeNumChildren[docNode] = 0; break; } case NodeKind.NS: { int nsNode = rep.nodeRepID[i] = nextNSNodeRep++; assert (nsNode < rep.numNSNodeReps); lastNSNodeRepOrdinal = rep.nsNodeOrdinal[nsNode] = lastNSNodeRepOrdinal + decoder.decodeUnsignedLong(); rep.nsNodePrevNSNodeRepID[nsNode] = rep.nodeRepID[i] - decoder.decodeUnsigned() - 1; assert (rep.nsNodePrevNSNodeRepID[nsNode] < rep.numNSNodeReps || rep.nsNodePrevNSNodeRepID[nsNode] == Integer.MAX_VALUE); rep.nsNodePrefixAtom[nsNode] = decoder.decodeUnsigned() - 1; assert (rep.nsNodePrefixAtom[nsNode] < rep.atomLimit || rep.nsNodePrefixAtom[nsNode] == Integer.MAX_VALUE); rep.nsNodeUriAtom[nsNode] = decoder.decodeUnsigned() - 1; assert (rep.nsNodeUriAtom[nsNode] < rep.atomLimit || rep.nsNodeUriAtom[nsNode] == Integer.MAX_VALUE); break; } case NodeKind.PERM: { int permNode = rep.nodeRepID[i] = nextPermNodeRep++; assert (permNode < rep.numPermNodeReps); lastPermNodeRepOrdinal = rep.permNodeOrdinal[permNode] = lastPermNodeRepOrdinal + decoder.decodeUnsignedLong(); long prevPermNode = rep.permNodePrevPermNodeRepID[permNode] = permNode - decoder.decodeUnsigned() - 1; assert (prevPermNode < rep.numPermNodeReps || prevPermNode == Integer.MAX_VALUE); Capability capability = rep.permNodeCapability[permNode] = Capability.values()[decoder .decodeUnsigned(4)]; assert (capability != Capability.NULL); long roleId = rep.permNodeRoleId[permNode] = decoder.decode64bits(); assert (roleId < Long.MAX_VALUE); break; } case NodeKind.NULL: { switch (decoder.decodeUnsigned(3)) { case 1: { rep.nodeKind[i] = NodeKind.BOOLEAN; rep.nodeRepID[i] = 0; break; } case 2: { rep.nodeKind[i] = NodeKind.BOOLEAN; rep.nodeRepID[i] = 1; break; } case 3: { rep.nodeKind[i] = NodeKind.NUMBER; rep.nodeRepID[i] = nextDouble++; assert (rep.nodeRepID[i] < numDoubles); rep.doubles[rep.nodeRepID[i]] = decoder.decodeDouble(); break; } case 4: { rep.nodeKind[i] = NodeKind.ARRAY; rep.nodeRepID[i] = nextArrayNodeRep++; assert (rep.nodeRepID[i] < numArrayNodeReps); rep.arrayNodeTextRepID[rep.nodeRepID[i]] = Integer.MAX_VALUE; rep.arrayNodeChildNodeRepID[rep.nodeRepID[i]] = Integer.MAX_VALUE; rep.arrayNodeNumChildren[rep.nodeRepID[i]] = 0; break; } case 5: { rep.nodeKind[i] = NodeKind.OBJECT; rep.nodeRepID[i] = nextArrayNodeRep++; assert (rep.nodeRepID[i] < numArrayNodeReps); rep.arrayNodeTextRepID[rep.nodeRepID[i]] = rep.numTextReps; rep.arrayNodeChildNodeRepID[rep.nodeRepID[i]] = Integer.MAX_VALUE; rep.arrayNodeNumChildren[rep.nodeRepID[i]] = 0; int numKeys = decoder.decodeUnsigned(); addText(rep, numKeys); int atomLimit = rep.atomLimit; for (int j = 0; j < numKeys; ++j) { int atom = decoder.decodeUnsigned(); assert (atom < atomLimit); if (atom >= atomLimit) { bad = "atom"; if (LOG.isTraceEnabled()) LOG.trace(String.format("bad atom %d atomLimit %d", atom, atomLimit)); } rep.textReps[rep.numTextReps++] = atom; } break; } default: break; } break; } default: break; } } if (rep.numNodeReps > 0) { assignOrdinals(rep); } return rep; } private void decodeBinary(Decoder decoder, ExpandedTree rep, int nbytes) throws IOException { int nwords = ((nbytes + 3) / 4); if (nwords <= 0) { LOG.error("nbytes=" + nbytes + ", nwords=" + nwords); } rep.binaryData = new int[nwords]; decoder.decode(rep.binaryData, 1, nwords); } private void assignOrdinals(ExpandedTree rep) { long ordinal = 0; int nodeID = 0; if (rep.nodeKind[0] == NodeKind.LINK) { rep.ordinal = rep.linkNodeNodeCount[rep.nodeRepID[0]]; rep.nodeOrdinal[0] = 0; nodeID = 1; } while (nodeID != Integer.MAX_VALUE) { rep.nodeOrdinal[nodeID] = ordinal++; switch (rep.nodeKind[nodeID]) { case NodeKind.ELEM: { int elemID = rep.nodeRepID[nodeID]; for (int i = 0; i < rep.elemNodeNumAttributes[elemID]; i++) { int attrNodeID = rep.elemNodeAttrNodeRepID[elemID] + i; rep.nodeOrdinal[attrNodeID] = ordinal++; } int childNodeID = rep.elemNodeChildNodeRepID[elemID]; if (childNodeID != Integer.MAX_VALUE) { nodeID = childNodeID; continue; } break; } case NodeKind.LINK: { int linkID = rep.nodeRepID[nodeID]; ordinal += rep.linkNodeNodeCount[linkID] - 1; break; } case NodeKind.DOC: { int docID = rep.nodeRepID[nodeID]; int childNodeID = rep.docNodeChildNodeRepID[docID]; if (childNodeID != Integer.MAX_VALUE) { nodeID = childNodeID; continue; } break; } case NodeKind.ARRAY: case NodeKind.OBJECT: { int docID = rep.nodeRepID[nodeID]; int childNodeID = rep.arrayNodeChildNodeRepID[docID]; if (childNodeID != Integer.MAX_VALUE) { nodeID = childNodeID; continue; } break; } default: break; } int parentNodeID = rep.nodeParentNodeRepID[nodeID]; for (;;) { if (parentNodeID == Integer.MAX_VALUE) { nodeID = Integer.MAX_VALUE; break; } if (rep.nodeKind[parentNodeID] == NodeKind.ELEM) { int elemID = rep.nodeRepID[parentNodeID]; if (++nodeID < rep.elemNodeChildNodeRepID[elemID] + rep.elemNodeNumChildren[elemID]) break; } else if (rep.nodeKind[parentNodeID] == NodeKind.DOC) { int docID = rep.nodeRepID[parentNodeID]; if (++nodeID < rep.docNodeChildNodeRepID[docID] + rep.docNodeNumChildren[docID]) break; } else if (rep.nodeKind[parentNodeID] == NodeKind.ARRAY || rep.nodeKind[parentNodeID] == NodeKind.OBJECT) { int docID = rep.nodeRepID[parentNodeID]; if (++nodeID < rep.arrayNodeChildNodeRepID[docID] + rep.arrayNodeNumChildren[docID]) break; } nodeID = parentNodeID; parentNodeID = rep.nodeParentNodeRepID[nodeID]; } } for (int j = rep.numNodeReps - rep.numNSNodeReps - rep.numPermNodeReps; j < rep.numNodeReps; ++j) rep.nodeOrdinal[j] = ordinal++; for (int k = rep.numNodeReps - rep.numPermNodeReps; k < rep.numNodeReps; ++k) rep.nodeOrdinal[k] = ordinal++; // TODO: compared performance if (Boolean.getBoolean("xcc.decode.atoms")) { for (int x = 0; x < rep.atomLimit; ++x) rep.atomString(x); } } public static int hashWrap(int x, int y) { return (x < y) ? x : x - y; } }