Java tutorial
/* jMimeMagic(TM) is a Java library for determining the content type of files or streams. Copyright (C) 2004 David Castro This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA For more information, please email arimus@users.sourceforge.net */ package ar.com.qbe.siniestros.model.utils.MimeMagic; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.xml.sax.Attributes; import org.xml.sax.ContentHandler; import org.xml.sax.ErrorHandler; import org.xml.sax.SAXException; import org.xml.sax.SAXNotRecognizedException; import org.xml.sax.SAXNotSupportedException; import org.xml.sax.SAXParseException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.helpers.XMLReaderFactory; import java.io.ByteArrayOutputStream; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; /** * DOCUMENT ME! * * @author $Author$ * @version $Revision$ */ public class MagicParser extends DefaultHandler implements ContentHandler, ErrorHandler { private static String magicFile = "/magic.xml"; private static Log log = LogFactory.getLog(MagicParser.class); // Namespaces feature id (http://xml.org/sax/features/namespaces). protected static final String NAMESPACES_FEATURE_ID = "http://xml.org/sax/features/namespaces"; // Validation feature id (http://xml.org/sax/features/validation). protected static final String VALIDATION_FEATURE_ID = "http://xml.org/sax/features/validation"; // Schema validation feature id (http://apache.org/xml/features/validation/schema). protected static final String SCHEMA_VALIDATION_FEATURE_ID = "http://apache.org/xml/features/validation/schema"; // Schema full checking feature id (http://apache.org/xml/features/validation/schema-full-checking). protected static final String SCHEMA_FULL_CHECKING_FEATURE_ID = "http://apache.org/xml/features/validation/schema-full-checking"; // Default parser name. protected static final String DEFAULT_PARSER_NAME = "org.apache.xerces.parsers.SAXParser"; // Default namespaces support (true). protected static final boolean DEFAULT_NAMESPACES = true; // Default validation support (false). protected static final boolean DEFAULT_VALIDATION = false; // Default Schema validation support (false). protected static final boolean DEFAULT_SCHEMA_VALIDATION = false; // Default Schema full checking support (false). protected static final boolean DEFAULT_SCHEMA_FULL_CHECKING = false; private boolean initialized = false; private XMLReader parser = null; private ArrayList stack = new ArrayList(); private Collection matchers = new ArrayList(); private MagicMatcher matcher = null; private MagicMatch match = null; private HashMap properties = null; private String finalValue = ""; private boolean isMimeType = false; private boolean isExtension = false; private boolean isDescription = false; private boolean isTest = false; /** * constructor */ public MagicParser() { log.debug("instantiated"); } /** * parse the xml file and create our MagicMatcher object list * * @throws MagicParseException DOCUMENT ME! */ public synchronized void initialize() throws MagicParseException { boolean namespaces = DEFAULT_NAMESPACES; boolean validation = DEFAULT_VALIDATION; boolean schemaValidation = DEFAULT_SCHEMA_VALIDATION; boolean schemaFullChecking = DEFAULT_SCHEMA_FULL_CHECKING; if (!initialized) { // use default parser try { parser = XMLReaderFactory.createXMLReader(); } catch (Exception e) { try { log.debug("falling back to default parser: " + DEFAULT_PARSER_NAME); parser = XMLReaderFactory.createXMLReader(DEFAULT_PARSER_NAME); } catch (Exception ee) { throw new MagicParseException("unable to instantiate parser"); } } // set parser features try { parser.setFeature(NAMESPACES_FEATURE_ID, namespaces); } catch (SAXException e) { log.debug("initialize(): warning: Parser does not support feature (" + NAMESPACES_FEATURE_ID + ")"); } try { parser.setFeature(VALIDATION_FEATURE_ID, validation); } catch (SAXException e) { log.debug("initialize(): warning: Parser does not support feature (" + VALIDATION_FEATURE_ID + ")"); } try { parser.setFeature(SCHEMA_VALIDATION_FEATURE_ID, schemaValidation); } catch (SAXNotRecognizedException e) { // ignore } catch (SAXNotSupportedException e) { log.debug("initialize(): warning: Parser does not support feature (" + SCHEMA_VALIDATION_FEATURE_ID + ")"); } try { parser.setFeature(SCHEMA_FULL_CHECKING_FEATURE_ID, schemaFullChecking); } catch (SAXNotRecognizedException e) { // ignore } catch (SAXNotSupportedException e) { log.debug("initialize(): warning: Parser does not support feature (" + SCHEMA_FULL_CHECKING_FEATURE_ID + ")"); } // set handlers parser.setErrorHandler(this); parser.setContentHandler(this); // parse file try { // get the magic file URL String magicURL = MagicParser.class.getResource(magicFile).toString(); if (magicURL == null) { log.error("initialize(): couldn't load '" + magicURL + "'"); throw new MagicParseException("couldn't load '" + magicURL + "'"); } parser.parse(magicURL); } catch (SAXParseException e) { // ignore } catch (Exception e) { e.printStackTrace(); throw new MagicParseException("parse error occurred - " + e.getMessage()); } initialized = true; } } /** * DOCUMENT ME! * * @return DOCUMENT ME! */ public Collection getMatchers() { return matchers; } /** * DOCUMENT ME! * * @throws SAXException DOCUMENT ME! */ public void startDocument() throws SAXException { log.debug("startDocument()"); } /** * DOCUMENT ME! * * @throws SAXException DOCUMENT ME! */ public void endDocument() throws SAXException { log.debug("endDocument()"); } /** * DOCUMENT ME! * * @param target DOCUMENT ME! * @param data DOCUMENT ME! * * @throws SAXException DOCUMENT ME! */ public void processingInstruction(String target, String data) throws SAXException { // do nothing } /** * DOCUMENT ME! * * @param ch DOCUMENT ME! * @param offset DOCUMENT ME! * @param length DOCUMENT ME! * * @throws SAXException DOCUMENT ME! */ public void characters(char[] ch, int offset, int length) throws SAXException { String value = new String(ch, offset, length); log.debug("characters(): value is '" + value + "'"); finalValue += value; } /** * DOCUMENT ME! * * @param ch DOCUMENT ME! * @param offset DOCUMENT ME! * @param length DOCUMENT ME! * * @throws SAXException DOCUMENT ME! */ public void ignorableWhitespace(char[] ch, int offset, int length) throws SAXException { // do nothing } /** * DOCUMENT ME! * * @param uri DOCUMENT ME! * @param localName DOCUMENT ME! * @param qname DOCUMENT ME! * @param attributes DOCUMENT ME! * * @throws SAXException DOCUMENT ME! */ public void startElement(String uri, String localName, String qname, Attributes attributes) throws SAXException { log.debug("startElement()"); log.debug("startElement(): localName is '" + localName + "'"); // create a new matcher if (localName.equals("match")) { log.debug("startElement(): creating new matcher"); // match to hold data match = new MagicMatch(); // our matcher matcher = new MagicMatcher(); matcher.setMatch(match); } // these are subelements of matcher, but also occur elsewhere if (matcher != null) { if (localName.equals("mimetype")) { isMimeType = true; } else if (localName.equals("extension")) { isExtension = true; } else if (localName.equals("description")) { isDescription = true; } else if (localName.equals("test")) { isTest = true; int length = attributes.getLength(); for (int i = 0; i < length; i++) { String attrLocalName = attributes.getLocalName(i); String attrValue = attributes.getValue(i); if (attrLocalName.equals("offset")) { if (!attrValue.equals("")) { match.setOffset(new Integer(attrValue).intValue()); log.debug("startElement(): setting offset to '" + attrValue + "'"); } } else if (attrLocalName.equals("length")) { if (!attrValue.equals("")) { match.setLength(new Integer(attrValue).intValue()); log.debug("startElement(): setting length to '" + attrValue + "'"); } } else if (attrLocalName.equals("type")) { match.setType(attrValue); log.debug("startElement(): setting type to '" + attrValue + "'"); } else if (attrLocalName.equals("bitmask")) { if (!attrValue.equals("")) { match.setBitmask(attrValue); log.debug("startElement(): setting bitmask to '" + attrValue + "'"); } } else if (attrLocalName.equals("comparator")) { match.setComparator(attrValue); log.debug("startElement(): setting comparator to '" + attrValue + "'"); } } } else if (localName.equals("property")) { int length = attributes.getLength(); String name = null; String value = null; for (int i = 0; i < length; i++) { String attrLocalName = attributes.getLocalName(i); String attrValue = attributes.getValue(i); if (attrLocalName.equals("name")) { if (!attrValue.equals("")) { name = attrValue; } } else if (attrLocalName.equals("value")) { if (!attrValue.equals("")) { value = attrValue; } } } // save the property to our map if ((name != null) && (value != null)) { if (properties == null) { properties = new HashMap(); } if (!properties.containsKey(name)) { properties.put(name, value); log.debug("startElement(): setting property '" + name + "'='" + value + "'"); } else { log.debug("startElement(): not setting property '" + name + "', duplicate key"); } } } else if (localName.equals("match-list")) { log.debug("startElement(): found submatcher list"); // this means we are processing a child match, so we need to push // the existing match on the stack log.debug("startElement(): pushing current matcher to stack"); stack.add(matcher); } else { // we don't care about this type } } } /** * DOCUMENT ME! * * @param uri DOCUMENT ME! * @param localName DOCUMENT ME! * @param qname DOCUMENT ME! * * @throws SAXException DOCUMENT ME! */ public void endElement(String uri, String localName, String qname) throws SAXException { log.debug("endElement()"); log.debug("endElement(): localName is '" + localName + "'"); // determine which tag these chars are for and save them if (isMimeType) { isMimeType = false; match.setMimeType(finalValue); log.debug("characters(): setting mimetype to '" + finalValue + "'"); } else if (isExtension) { isExtension = false; match.setExtension(finalValue); log.debug("characters(): setting extension to '" + finalValue + "'"); } else if (isDescription) { isDescription = false; match.setDescription(finalValue); log.debug("characters(): setting description to '" + finalValue + "'"); } else if (isTest) { isTest = false; match.setTest(convertOctals(finalValue)); log.debug("characters(): setting test to '" + convertOctals(finalValue) + "'"); } else { // do nothing } finalValue = ""; // need to save the current matcher here if it is filled out enough and // we have an /matcher if (localName.equals("match")) { // FIXME - make sure the MagicMatcher isValid() test works if (matcher.isValid()) { // set the collected properties on this matcher match.setProperties(properties); // add root match if (stack.size() == 0) { log.debug("endElement(): adding root matcher"); matchers.add(matcher); } else { // we need to add the match to it's parent which is on the // stack log.debug("endElement(): adding sub matcher"); MagicMatcher m = (MagicMatcher) stack.get(stack.size() - 1); m.addSubMatcher(matcher); } } else { // don't add invalid matchers log.info("endElement(): not adding invalid matcher '" + match.getDescription() + "'"); } matcher = null; properties = null; // restore matcher from the stack if we have an /matcher-list } else if (localName.equals("match-list")) { if (stack.size() > 0) { log.debug("endElement(): popping from the stack"); matcher = (MagicMatcher) stack.get(stack.size() - 1); // pop from the stack stack.remove(matcher); } } else if (localName.equals("mimetype")) { isMimeType = false; } else if (localName.equals("extension")) { isExtension = false; } else if (localName.equals("description")) { isDescription = false; } else if (localName.equals("test")) { isTest = false; } } /** * DOCUMENT ME! * * @param ex DOCUMENT ME! * * @throws SAXException DOCUMENT ME! */ public void warning(SAXParseException ex) throws SAXException { // FIXME } /** * DOCUMENT ME! * * @param ex DOCUMENT ME! * * @throws SAXException DOCUMENT ME! */ public void error(SAXParseException ex) throws SAXException { // FIXME throw ex; } /** * DOCUMENT ME! * * @param ex DOCUMENT ME! * * @throws SAXException DOCUMENT ME! */ public void fatalError(SAXParseException ex) throws SAXException { // FIXME throw ex; } /** * replaces octal representations of bytes, written as \ddd to actual byte values. * * @param s a string with encoded octals * * @return string with all octals decoded */ private ByteBuffer convertOctals(String s) { int beg = 0; int end = 0; int c1; int c2; int c3; int chr; ByteArrayOutputStream buf = new ByteArrayOutputStream(); while ((end = s.indexOf('\\', beg)) != -1) { if (s.charAt(end + 1) != '\\') { //log.debug("appending chunk '"+s.substring(beg, end)+"'"); for (int z = beg; z < end; z++) { buf.write((int) s.charAt(z)); } //log.debug("found \\ at position "+end); //log.debug("converting octal '"+s.substring(end, end+4)+"'"); if ((end + 4) <= s.length()) { try { chr = Integer.parseInt(s.substring(end + 1, end + 4), 8); //log.debug("converted octal '"+s.substring(end+1,end+4)+"' to '"+chr); //log.debug("converted octal back to '"+Integer.toOctalString(chr)); //log.debug("converted '"+s.substring(end+1,end+4)+"' to "+chr+"/"+((char)chr)); buf.write(chr); beg = end + 4; end = beg; } catch (NumberFormatException nfe) { //log.debug("not an octal"); buf.write((int) '\\'); beg = end + 1; end = beg; } } else { //log.debug("not an octal, not enough chars left in string"); buf.write((int) '\\'); beg = end + 1; end = beg; } } else { //log.debug("appending \\"); buf.write((int) '\\'); beg = end + 1; end = beg; } } if (end < s.length()) { for (int z = beg; z < s.length(); z++) { buf.write((int) s.charAt(z)); } } try { log.debug("convertOctals(): returning buffer size '" + buf.size() + "'"); ByteBuffer b = ByteBuffer.allocate(buf.size()); return b.put(buf.toByteArray()); } catch (Exception e) { log.error("convertOctals(): error parsing string: " + e); return ByteBuffer.allocate(0); } } }