ar.com.qbe.siniestros.model.utils.MimeMagic.MagicMatcher.java Source code

Java tutorial

Introduction

Here is the source code for ar.com.qbe.siniestros.model.utils.MimeMagic.MagicMatcher.java

Source

/*
jMimeMagic(TM) is a Java library for determining the content type of files or
streams.
    
Copyright (C) 2004 David Castro
    
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
    
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.
    
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
    
For more information, please email arimus@users.sourceforge.net
*/
package ar.com.qbe.siniestros.model.utils.MimeMagic;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.oro.text.perl.Perl5Util;

import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;

import java.nio.ByteBuffer;
import java.nio.ByteOrder;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;

/**
 * This class represents a single match test
 *
 * @author $Author: arimus $
 * @version $Revision: 1.1 $
 */
public class MagicMatcher implements Cloneable {
    private static Log log = LogFactory.getLog(MagicMatcher.class);
    private ArrayList subMatchers = new ArrayList(0);
    private MagicMatch match = null;

    /** 
     * constructor 
     */
    public MagicMatcher() {
        log.debug("instantiated");
    }

    /**
     * DOCUMENT ME!
     *
     * @param match DOCUMENT ME!
     */
    public void setMatch(MagicMatch match) {
        log.debug("setMatch()");
        this.match = match;
    }

    /**
     * DOCUMENT ME!
     *
     * @return DOCUMENT ME!
     */
    public MagicMatch getMatch() {
        log.debug("getMatch()");

        return this.match;
    }

    /**
     * test to see if everything is in order for this match
     *
     * @return whether or not this match has enough data to be valid
     */
    public boolean isValid() {
        log.debug("isValid()");

        if ((match == null) || (match.getTest() == null)) {
            return false;
        }

        String type = new String(match.getTest().array());
        char comparator = match.getComparator();
        String description = match.getDescription();
        String test = new String(match.getTest().array());

        if ((type != null) && !type.equals("") && (comparator != '\0')
                && ((comparator == '=') || (comparator == '!') || (comparator == '>') || (comparator == '<'))
                && (description != null) && !description.equals("") && (test != null) && !test.equals("")) {
            return true;
        }

        return false;
    }

    /**
     * add a submatch to this magic match
     *
     * @param m a magic match
     */
    public void addSubMatcher(MagicMatcher m) {
        log.debug("addSubMatcher()");
        subMatchers.add(m);
    }

    /**
     * set all submatches
     *
     * @param a a collection of submatches
     */
    public void setSubMatchers(Collection a) {
        log.debug("setSubMatchers(): for match '" + match.getDescription() + "'");
        subMatchers.clear();
        subMatchers.addAll(a);
    }

    /**
     * get all submatches for this magic match
     *
     * @return a collection of submatches
     */
    public Collection getSubMatchers() {
        log.debug("getSubMatchers()");

        return subMatchers;
    }

    /**
     * test to see if this match or any submatches match
     *
     * @param f the file that should be used to test the match
     * @param onlyMimeMatch DOCUMENT ME!
     *
     * @return the deepest magic match object that matched
     *
     * @throws IOException DOCUMENT ME!
     * @throws UnsupportedTypeException DOCUMENT ME!
     */
    public MagicMatch test(File f, boolean onlyMimeMatch) throws IOException, UnsupportedTypeException {
        log.debug("test(File)");

        int offset = match.getOffset();
        String description = match.getDescription();
        String type = match.getType();
        String mimeType = match.getMimeType();

        log.debug("test(File): testing '" + f.getName() + "' for '" + description + "'");

        log.debug("test(File): \n=== BEGIN MATCH INFO ==");
        log.debug(match.print());
        log.debug("test(File): \n=== END MATCH INFO ====\n");

        RandomAccessFile file = null;
        file = new RandomAccessFile(f, "r");

        try {
            int length = 0;

            if (type.equals("byte")) {
                length = 1;
            } else if (type.equals("short") || type.equals("leshort") || type.equals("beshort")) {
                length = 4;
            } else if (type.equals("long") || type.equals("lelong") || type.equals("belong")) {
                length = 8;
            } else if (type.equals("string")) {
                length = match.getTest().capacity();
            } else if (type.equals("regex")) {

                final int matchLength = match.getLength();
                length = (matchLength == 0) ? (int) file.length() - offset : matchLength;

                if (length < 0) {
                    length = 0;
                }
            } else if (type.equals("detector")) {
                length = (int) file.length() - offset;

                if (length < 0) {
                    length = 0;
                }
            } else {
                throw new UnsupportedTypeException("unsupported test type '" + type + "'");
            }

            // we know this match won't work since there isn't enough data for the test
            if (length > (file.length() - offset)) {
                return null;
            }

            byte[] buf = new byte[length];
            file.seek(offset);

            int bytesRead = 0;
            int size = 0;
            boolean gotAllBytes = false;
            boolean done = false;

            while (!done) {
                size = file.read(buf, 0, length - bytesRead);

                if (size == -1) {
                    throw new IOException("reached end of file before all bytes were read");
                }

                bytesRead += size;

                if (bytesRead == length) {
                    gotAllBytes = true;
                    done = true;
                }
            }

            log.debug("test(File): stream size is '" + buf.length + "'");

            MagicMatch match = null;
            MagicMatch submatch = null;

            if (testInternal(buf)) {
                // set the top level match to this one
                try {
                    match = getMatch() != null ? (MagicMatch) getMatch().clone() : null;
                } catch (CloneNotSupportedException e) {
                    // noop
                }

                log.debug("test(File): testing matched '" + description + "'");

                // set the data on this match
                if ((onlyMimeMatch == false) && (subMatchers != null) && (subMatchers.size() > 0)) {
                    log.debug(
                            "test(File): testing " + subMatchers.size() + " submatches for '" + description + "'");

                    for (int i = 0; i < subMatchers.size(); i++) {
                        log.debug("test(File): testing submatch " + i);

                        MagicMatcher m = (MagicMatcher) subMatchers.get(i);

                        if ((submatch = m.test(f, false)) != null) {
                            log.debug("test(File): submatch " + i + " matched with '" + submatch.getDescription()
                                    + "'");
                            match.addSubMatch(submatch);
                        } else {
                            log.debug("test(File): submatch " + i + " doesn't match");
                        }
                    }
                }
            }

            return match;
        } finally {
            try {
                file.close();
            } catch (Exception fce) {
            }
        }
    }

    /**
     * test to see if this match or any submatches match
     *
     * @param data the data that should be used to test the match
     * @param onlyMimeMatch DOCUMENT ME!
     *
     * @return the deepest magic match object that matched
     *
     * @throws IOException DOCUMENT ME!
     * @throws UnsupportedTypeException DOCUMENT ME!
     */
    public MagicMatch test(byte[] data, boolean onlyMimeMatch) throws IOException, UnsupportedTypeException {
        log.debug("test(byte[])");

        int offset = match.getOffset();
        String description = match.getDescription();
        String type = match.getType();
        String test = new String(match.getTest().array());
        String mimeType = match.getMimeType();

        log.debug("test(byte[]): testing byte[] data for '" + description + "'");

        log.debug("test(byte[]): \n=== BEGIN MATCH INFO ==");
        log.debug(match.print());
        log.debug("test(byte[]): \n=== END MATCH INFO ====\n");

        int length = 0;

        if (type.equals("byte")) {
            length = 1;
        } else if (type.equals("short") || type.equals("leshort") || type.equals("beshort")) {
            length = 4;
        } else if (type.equals("long") || type.equals("lelong") || type.equals("belong")) {
            length = 8;
        } else if (type.equals("string")) {
            length = match.getTest().capacity();
        } else if (type.equals("regex")) {
            // FIXME - something wrong here, shouldn't have to subtract 1???
            length = data.length - offset - 1;

            if (length < 0) {
                length = 0;
            }
        } else if (type.equals("detector")) {
            // FIXME - something wrong here, shouldn't have to subtract 1???
            length = data.length - offset - 1;

            if (length < 0) {
                length = 0;
            }
        } else {
            throw new UnsupportedTypeException("unsupported test type " + type);
        }

        byte[] buf = new byte[length];
        log.debug("test(byte[]): offset=" + offset + ",length=" + length + ",data length=" + data.length);

        if ((offset + length) < data.length) {
            System.arraycopy(data, offset, buf, 0, length);

            log.debug("test(byte[]): stream size is '" + buf.length + "'");

            MagicMatch match = null;
            MagicMatch submatch = null;

            if (testInternal(buf)) {
                // set the top level match to this one
                try {
                    match = getMatch() != null ? (MagicMatch) getMatch().clone() : null;
                } catch (CloneNotSupportedException e) {
                    // noop
                }

                log.debug("test(byte[]): testing matched '" + description + "'");

                // set the data on this match
                if ((onlyMimeMatch == false) && (subMatchers != null) && (subMatchers.size() > 0)) {
                    log.debug("test(byte[]): testing " + subMatchers.size() + " submatches for '" + description
                            + "'");

                    for (int i = 0; i < subMatchers.size(); i++) {
                        log.debug("test(byte[]): testing submatch " + i);

                        MagicMatcher m = (MagicMatcher) subMatchers.get(i);

                        if ((submatch = m.test(data, false)) != null) {
                            log.debug("test(byte[]): submatch " + i + " matched with '" + submatch.getDescription()
                                    + "'");
                            match.addSubMatch(submatch);
                        } else {
                            log.debug("test(byte[]): submatch " + i + " doesn't match");
                        }
                    }
                }
            }

            return match;
        } else {
            return null;
        }
    }

    /**
     * internal test switch
     * 
     * @param data DOCUMENT ME!
     * @return DOCUMENT ME!
     */
    private boolean testInternal(byte[] data) {
        log.debug("testInternal(byte[])");

        if (data.length == 0) {
            return false;
        }

        String type = match.getType();
        String test = new String(match.getTest().array());
        String mimeType = match.getMimeType();
        String description = match.getDescription();

        ByteBuffer buffer = ByteBuffer.allocate(data.length);

        if ((type != null) && (test != null) && (test.length() > 0)) {
            if (type.equals("string")) {
                buffer = buffer.put(data);

                return testString(buffer);
            } else if (type.equals("byte")) {
                buffer = buffer.put(data);

                return testByte(buffer);
            } else if (type.equals("short")) {
                buffer = buffer.put(data);

                return testShort(buffer);
            } else if (type.equals("leshort")) {
                buffer = buffer.put(data);
                buffer.order(ByteOrder.LITTLE_ENDIAN);

                return testShort(buffer);
            } else if (type.equals("beshort")) {
                buffer = buffer.put(data);
                buffer.order(ByteOrder.BIG_ENDIAN);

                return testShort(buffer);
            } else if (type.equals("long")) {
                buffer = buffer.put(data);

                return testLong(buffer);
            } else if (type.equals("lelong")) {
                buffer = buffer.put(data);
                buffer.order(ByteOrder.LITTLE_ENDIAN);

                return testLong(buffer);
            } else if (type.equals("belong")) {
                buffer = buffer.put(data);
                buffer.order(ByteOrder.BIG_ENDIAN);

                return testLong(buffer);
            } else if (type.equals("regex")) {
                return testRegex(new String(data));
            } else if (type.equals("detector")) {
                buffer = buffer.put(data);

                return testDetector(buffer);

                //         } else if (type.equals("date")) {
                //            return testDate(data, BIG_ENDIAN);
                //         } else if (type.equals("ledate")) {
                //            return testDate(data, LITTLE_ENDIAN);
                //         } else if (type.equals("bedate")) {
                //            return testDate(data, BIG_ENDIAN);
            } else {
                log.error("testInternal(byte[]): invalid test type '" + type + "'");
            }
        } else {
            log.error("testInternal(byte[]): type or test is empty for '" + mimeType + " - " + description + "'");
        }

        return false;
    }

    /**
     * test the data against the test byte
     *
     * @param data the data we are testing
     *
     * @return if we have a match
     */
    private boolean testByte(ByteBuffer data) {
        log.debug("testByte()");

        String test = new String(match.getTest().array());
        char comparator = match.getComparator();
        long bitmask = match.getBitmask();

        String s = test;
        byte b = data.get(0);
        b = (byte) (b & bitmask);
        log.debug("testByte(): decoding '" + test + "' to byte");

        int tst = Integer.decode(test).byteValue();
        byte t = (byte) (tst & 0xff);
        log.debug("testByte(): applying bitmask '" + bitmask + "' to '" + tst + "', result is '" + t + "'");
        log.debug("testByte(): comparing byte '" + b + "' to '" + t + "'");

        switch (comparator) {
        case '=':
            return t == b;

        case '!':
            return t != b;

        case '>':
            return t > b;

        case '<':
            return t < b;
        }

        return false;
    }

    /**
     * test the data against the byte array
     *
     * @param data the data we are testing
     *
     * @return if we have a match
     */
    private boolean testString(ByteBuffer data) {
        log.debug("testString()");

        ByteBuffer test = match.getTest();
        char comparator = match.getComparator();

        byte[] b = data.array();
        byte[] t = test.array();

        boolean diff = false;
        int i = 0;

        for (i = 0; i < t.length; i++) {
            log.debug("testing byte '" + b[i] + "' from '" + new String(data.array()) + "' against byte '" + t[i]
                    + "' from '" + new String(test.array()) + "'");

            if (t[i] != b[i]) {
                diff = true;

                break;
            }
        }

        switch (comparator) {
        case '=':
            return !diff;

        case '!':
            return diff;

        case '>':
            return t[i] > b[i];

        case '<':
            return t[i] < b[i];
        }

        return false;
    }

    /**
     * test the data against a short
     *
     * @param data the data we are testing
     *
     * @return if we have a match
     */
    private boolean testShort(ByteBuffer data) {
        log.debug("testShort()");

        short val = 0;
        String test = new String(match.getTest().array());
        char comparator = match.getComparator();
        long bitmask = match.getBitmask();

        val = byteArrayToShort(data);

        // apply bitmask before the comparison
        val = (short) (val & (short) bitmask);

        short tst = 0;

        try {
            tst = Integer.decode(test).shortValue();
        } catch (NumberFormatException e) {
            log.error("testShort(): " + e);

            return false;

            //if (test.length() == 1) {   
            //   tst = new Integer(Character.getNumericValue(test.charAt(0))).shortValue();
            //}
        }

        log.debug("testShort(): testing '" + Long.toHexString(val) + "' against '" + Long.toHexString(tst) + "'");

        switch (comparator) {
        case '=':
            return val == tst;

        case '!':
            return val != tst;

        case '>':
            return val > tst;

        case '<':
            return val < tst;
        }

        return false;
    }

    /**
     * test the data against a long
     *
     * @param data the data we are testing
     *
     * @return if we have a match
     */
    private boolean testLong(ByteBuffer data) {
        log.debug("testLong()");

        long val = 0;
        String test = new String(match.getTest().array());
        char comparator = match.getComparator();
        long bitmask = match.getBitmask();

        val = byteArrayToLong(data);

        // apply bitmask before the comparison
        val = val & bitmask;

        long tst = Long.decode(test).longValue();

        log.debug("testLong(): testing '" + Long.toHexString(val) + "' against '" + test + "' => '"
                + Long.toHexString(tst) + "'");

        switch (comparator) {
        case '=':
            return val == tst;

        case '!':
            return val != tst;

        case '>':
            return val > tst;

        case '<':
            return val < tst;
        }

        return false;
    }

    /**
     * test the data against a regex
     *
     * @param text the data we are testing
     *
     * @return if we have a match
     */
    private boolean testRegex(String text) {
        log.debug("testRegex()");

        String test = new String(match.getTest().array());
        char comparator = match.getComparator();

        Perl5Util utility = new Perl5Util();
        log.debug("testRegex(): searching for '" + test + "'");

        if (comparator == '=') {
            if (utility.match(test, text)) {
                return true;
            } else {
                return false;
            }
        } else if (comparator == '!') {
            if (utility.match(test, text)) {
                return false;
            } else {
                return true;
            }
        }

        return false;
    }

    /**
     * test the data using a detector
     *
     * @param data the data we are testing
     *
     * @return if we have a match
     */
    private boolean testDetector(ByteBuffer data) {
        log.debug("testDetector()");

        String detectorClass = new String(match.getTest().array());

        try {
            log.debug("loading class: " + detectorClass);

            Class c = Class.forName(detectorClass);
            MagicDetector detector = (MagicDetector) c.newInstance();
            String[] types = detector.process(data.array(), match.getOffset(), match.getLength(),
                    match.getBitmask(), match.getComparator(), match.getMimeType(), match.getProperties());

            if ((types != null) && (types.length > 0)) {
                // the match object has no mime type set, so set from the detector class processing
                match.setMimeType(types[0]);

                return true;
            }
        } catch (ClassNotFoundException e) {
            log.error("failed to load detector: " + detectorClass, e);
        } catch (InstantiationException e) {
            log.error("specified class is not a valid detector class: " + detectorClass, e);
        } catch (IllegalAccessException e) {
            log.error("specified class cannot be accessed: " + detectorClass, e);
        }

        return false;
    }

    /**
     * Get the extensions for the underlying detectory
     *
     * @return DOCUMENT ME!
     */
    public String[] getDetectorExtensions() {
        log.debug("testDetector()");

        String detectorClass = new String(match.getTest().array());

        try {
            log.debug("loading class: " + detectorClass);

            Class c = Class.forName(detectorClass);
            MagicDetector detector = (MagicDetector) c.newInstance();

            return detector.getHandledTypes();
        } catch (ClassNotFoundException e) {
            log.error("failed to load detector: " + detectorClass, e);
        } catch (InstantiationException e) {
            log.error("specified class is not a valid detector class: " + detectorClass, e);
        } catch (IllegalAccessException e) {
            log.error("specified class cannot be accessed: " + detectorClass, e);
        }

        return new String[0];
    }

    /**
     * encode a byte as an octal string
     *
     * @param b a byte of data
     *
     * @return an octal representation of the byte data
     */
    private String byteToOctalString(byte b) {
        int n1;
        int n2;
        int n3;
        n1 = (b / 32) & 7;
        n2 = (b / 8) & 7;
        n3 = b & 7;

        return String.valueOf(n1) + String.valueOf(n2) + String.valueOf(n3);
    }

    /**
     * convert a byte array to a short
     *
     * @param data buffer of byte data
     *
     * @return byte array converted to a short
     */
    private short byteArrayToShort(ByteBuffer data) {
        return data.getShort(0);
    }

    /**
     * convert a byte array to a long
     *
     * @param data buffer of byte data
     *
     * @return byte arrays (high and low bytes) converted to a long value
     */
    private long byteArrayToLong(ByteBuffer data) {
        return (long) data.getInt(0);
    }

    /**
     * DOCUMENT ME!
     *
     * @return DOCUMENT ME!
     *
     * @throws CloneNotSupportedException DOCUMENT ME!
     */
    protected Object clone() throws CloneNotSupportedException {
        MagicMatcher clone = new MagicMatcher();

        clone.setMatch((MagicMatch) match.clone());

        Iterator i = subMatchers.iterator();
        ArrayList sub = new ArrayList();

        while (i.hasNext()) {
            MagicMatcher m = (MagicMatcher) i.next();
            sub.add(m.clone());
        }

        clone.setSubMatchers(sub);

        return clone;
    }
}