Java examples for XML:XML String
decompress XML String
//package com.java2s; public class Main { public static void main(String[] argv) throws Exception { byte[] xml = new byte[] { 34, 35, 36, 37, 37, 37, 67, 68, 69 }; System.out.println(decompressXML(xml)); }/* w w w.ja va2s. c o m*/ private static int endDocTag = 0x00100101; private static int startTag = 0x00100102; private static int endTag = 0x00100103; private static String spaces = " "; public static String decompressXML(final byte[] xml) { StringBuilder finalXML = new StringBuilder(); // Compressed XML file/bytes starts with 24x bytes of data, // 9 32 bit words in little endian order (LSB first): // 0th word is 03 00 08 00 // 3rd word SEEMS TO BE: Offset at then of StringTable // 4th word is: Number of strings in string table // WARNING: Sometime I indiscriminently display or refer to word in // little endian storage format, or in integer format (ie MSB first). int numbStrings = LEW(xml, 4 * 4); // StringIndexTable starts at offset 24x, an array of 32 bit LE offsets // of the length/string data in the StringTable. int sitOff = 0x24; // Offset of start of StringIndexTable // StringTable, each string is represented with a 16 bit little endian // character count, followed by that number of 16 bit (LE) (Unicode) // chars. int stOff = sitOff + numbStrings * 4; // StringTable follows // StrIndexTable // XMLTags, The XML tag tree starts after some unknown content after the // StringTable. There is some unknown data after the StringTable, scan // forward from this point to the flag for the start of an XML start // tag. int xmlTagOff = LEW(xml, 3 * 4); // Start from the offset in the 3rd // word. // Scan forward until we find the bytes: 0x02011000(x00100102 in normal // int) for (int ii = xmlTagOff; ii < xml.length - 4; ii += 4) { if (LEW(xml, ii) == startTag) { xmlTagOff = ii; break; } } // end of hack, scanning for start of first start tag // XML tags and attributes: // Every XML start and end tag consists of 6 32 bit words: // 0th word: 02011000 for startTag and 03011000 for endTag // 1st word: a flag?, like 38000000 // 2nd word: Line of where this tag appeared in the original source file // 3rd word: FFFFFFFF ?? // 4th word: StringIndex of NameSpace name, or FFFFFFFF for default NS // 5th word: StringIndex of Element Name // (Note: 01011000 in 0th word means end of XML document, endDocTag) // Start tags (not end tags) contain 3 more words: // 6th word: 14001400 meaning?? // 7th word: Number of Attributes that follow this tag(follow word 8th) // 8th word: 00000000 meaning?? // Attributes consist of 5 words: // 0th word: StringIndex of Attribute Name's Namespace, or FFFFFFFF // 1st word: StringIndex of Attribute Name // 2nd word: StringIndex of Attribute Value, or FFFFFFF if ResourceId // used // 3rd word: Flags? // 4th word: str ind of attr value again, or ResourceId of value // TMP, dump string table to tr for debugging // tr.addSelect("strings", null); // for (int ii=0; ii<numbStrings; ii++) { // // Length of string starts at StringTable plus offset in StrIndTable // String str = compXmlString(xml, sitOff, stOff, ii); // tr.add(String.valueOf(ii), str); // } // tr.parent(); // Step through the XML tree element tags and attributes int off = xmlTagOff; int indent = 0; int startTagLineNo = -2; while (off < xml.length) { int tag0 = LEW(xml, off); // int tag1 = LEW(xml, off+1*4); int lineNo = LEW(xml, off + 2 * 4); // int tag3 = LEW(xml, off+3*4); int nameNsSi = LEW(xml, off + 4 * 4); int nameSi = LEW(xml, off + 5 * 4); if (tag0 == startTag) { // XML START TAG int tag6 = LEW(xml, off + 6 * 4); // Expected to be 14001400 int numbAttrs = LEW(xml, off + 7 * 4); // Number of Attributes // to follow // int tag8 = LEW(xml, off+8*4); // Expected to be 00000000 off += 9 * 4; // Skip over 6+3 words of startTag data String name = compXmlString(xml, sitOff, stOff, nameSi); // tr.addSelect(name, null); startTagLineNo = lineNo; // Look for the Attributes StringBuffer sb = new StringBuffer(); for (int ii = 0; ii < numbAttrs; ii++) { int attrNameNsSi = LEW(xml, off); // AttrName Namespace Str // Ind, or FFFFFFFF int attrNameSi = LEW(xml, off + 1 * 4); // AttrName String // Index int attrValueSi = LEW(xml, off + 2 * 4); // AttrValue Str // Ind, or // FFFFFFFF int attrFlags = LEW(xml, off + 3 * 4); int attrResId = LEW(xml, off + 4 * 4); // AttrValue // ResourceId or dup // AttrValue StrInd off += 5 * 4; // Skip over the 5 words of an attribute String attrName = compXmlString(xml, sitOff, stOff, attrNameSi); String attrValue = attrValueSi != -1 ? compXmlString( xml, sitOff, stOff, attrValueSi) : "resourceID 0x" + Integer.toHexString(attrResId); sb.append(" " + attrName + "=\"" + attrValue + "\""); // tr.add(attrName, attrValue); } finalXML.append("<" + name + sb + ">"); prtIndent(indent, "<" + name + sb + ">"); indent++; } else if (tag0 == endTag) { // XML END TAG indent--; off += 6 * 4; // Skip over 6 words of endTag data String name = compXmlString(xml, sitOff, stOff, nameSi); finalXML.append("</" + name + ">"); prtIndent(indent, "</" + name + "> (line " + startTagLineNo + "-" + lineNo + ")"); // tr.parent(); // Step back up the NobTree } else if (tag0 == endDocTag) { // END OF XML DOC TAG break; } else { prt(" Unrecognized tag code '" + Integer.toHexString(tag0) + "' at offset " + off); break; } } // end of while loop scanning tags and attributes of XML tree // prt(" end at offset " + off); return finalXML.toString(); } private static int LEW(final byte[] arr, final int off) { return arr[off + 3] << 24 & 0xff000000 | arr[off + 2] << 16 & 0xff0000 | arr[off + 1] << 8 & 0xff00 | arr[off] & 0xFF; } private static String compXmlString(final byte[] xml, final int sitOff, final int stOff, final int strInd) { if (strInd < 0) { return null; } int strOff = stOff + LEW(xml, sitOff + strInd * 4); return compXmlStringAt(xml, strOff); } private static void prtIndent(final int indent, final String str) { prt(spaces.substring(0, Math.min(indent * 2, spaces.length())) + str); } private static void prt(final String str) { // System.err.print(str); } private static String compXmlStringAt(final byte[] arr, final int strOff) { int strLen = arr[strOff + 1] << 8 & 0xff00 | arr[strOff] & 0xff; byte[] chars = new byte[strLen]; for (int ii = 0; ii < strLen; ii++) { chars[ii] = arr[strOff + 2 + ii * 2]; } return new String(chars); // Hack, just use 8 byte chars } }