Java tutorial
/* jMimeMagic(TM) is a Java library for determining the content type of files or streams. Copyright (C) 2004 David Castro This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA For more information, please email arimus@users.sourceforge.net */ package ar.com.qbe.siniestros.model.utils.MimeMagic; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.oro.text.perl.Perl5Util; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; /** * This class represents a single match test * * @author $Author: arimus $ * @version $Revision: 1.1 $ */ public class MagicMatcher implements Cloneable { private static Log log = LogFactory.getLog(MagicMatcher.class); private ArrayList subMatchers = new ArrayList(0); private MagicMatch match = null; /** * constructor */ public MagicMatcher() { log.debug("instantiated"); } /** * DOCUMENT ME! * * @param match DOCUMENT ME! */ public void setMatch(MagicMatch match) { log.debug("setMatch()"); this.match = match; } /** * DOCUMENT ME! * * @return DOCUMENT ME! */ public MagicMatch getMatch() { log.debug("getMatch()"); return this.match; } /** * test to see if everything is in order for this match * * @return whether or not this match has enough data to be valid */ public boolean isValid() { log.debug("isValid()"); if ((match == null) || (match.getTest() == null)) { return false; } String type = new String(match.getTest().array()); char comparator = match.getComparator(); String description = match.getDescription(); String test = new String(match.getTest().array()); if ((type != null) && !type.equals("") && (comparator != '\0') && ((comparator == '=') || (comparator == '!') || (comparator == '>') || (comparator == '<')) && (description != null) && !description.equals("") && (test != null) && !test.equals("")) { return true; } return false; } /** * add a submatch to this magic match * * @param m a magic match */ public void addSubMatcher(MagicMatcher m) { log.debug("addSubMatcher()"); subMatchers.add(m); } /** * set all submatches * * @param a a collection of submatches */ public void setSubMatchers(Collection a) { log.debug("setSubMatchers(): for match '" + match.getDescription() + "'"); subMatchers.clear(); subMatchers.addAll(a); } /** * get all submatches for this magic match * * @return a collection of submatches */ public Collection getSubMatchers() { log.debug("getSubMatchers()"); return subMatchers; } /** * test to see if this match or any submatches match * * @param f the file that should be used to test the match * @param onlyMimeMatch DOCUMENT ME! * * @return the deepest magic match object that matched * * @throws IOException DOCUMENT ME! * @throws UnsupportedTypeException DOCUMENT ME! */ public MagicMatch test(File f, boolean onlyMimeMatch) throws IOException, UnsupportedTypeException { log.debug("test(File)"); int offset = match.getOffset(); String description = match.getDescription(); String type = match.getType(); String mimeType = match.getMimeType(); log.debug("test(File): testing '" + f.getName() + "' for '" + description + "'"); log.debug("test(File): \n=== BEGIN MATCH INFO =="); log.debug(match.print()); log.debug("test(File): \n=== END MATCH INFO ====\n"); RandomAccessFile file = null; file = new RandomAccessFile(f, "r"); try { int length = 0; if (type.equals("byte")) { length = 1; } else if (type.equals("short") || type.equals("leshort") || type.equals("beshort")) { length = 4; } else if (type.equals("long") || type.equals("lelong") || type.equals("belong")) { length = 8; } else if (type.equals("string")) { length = match.getTest().capacity(); } else if (type.equals("regex")) { final int matchLength = match.getLength(); length = (matchLength == 0) ? (int) file.length() - offset : matchLength; if (length < 0) { length = 0; } } else if (type.equals("detector")) { length = (int) file.length() - offset; if (length < 0) { length = 0; } } else { throw new UnsupportedTypeException("unsupported test type '" + type + "'"); } // we know this match won't work since there isn't enough data for the test if (length > (file.length() - offset)) { return null; } byte[] buf = new byte[length]; file.seek(offset); int bytesRead = 0; int size = 0; boolean gotAllBytes = false; boolean done = false; while (!done) { size = file.read(buf, 0, length - bytesRead); if (size == -1) { throw new IOException("reached end of file before all bytes were read"); } bytesRead += size; if (bytesRead == length) { gotAllBytes = true; done = true; } } log.debug("test(File): stream size is '" + buf.length + "'"); MagicMatch match = null; MagicMatch submatch = null; if (testInternal(buf)) { // set the top level match to this one try { match = getMatch() != null ? (MagicMatch) getMatch().clone() : null; } catch (CloneNotSupportedException e) { // noop } log.debug("test(File): testing matched '" + description + "'"); // set the data on this match if ((onlyMimeMatch == false) && (subMatchers != null) && (subMatchers.size() > 0)) { log.debug( "test(File): testing " + subMatchers.size() + " submatches for '" + description + "'"); for (int i = 0; i < subMatchers.size(); i++) { log.debug("test(File): testing submatch " + i); MagicMatcher m = (MagicMatcher) subMatchers.get(i); if ((submatch = m.test(f, false)) != null) { log.debug("test(File): submatch " + i + " matched with '" + submatch.getDescription() + "'"); match.addSubMatch(submatch); } else { log.debug("test(File): submatch " + i + " doesn't match"); } } } } return match; } finally { try { file.close(); } catch (Exception fce) { } } } /** * test to see if this match or any submatches match * * @param data the data that should be used to test the match * @param onlyMimeMatch DOCUMENT ME! * * @return the deepest magic match object that matched * * @throws IOException DOCUMENT ME! * @throws UnsupportedTypeException DOCUMENT ME! */ public MagicMatch test(byte[] data, boolean onlyMimeMatch) throws IOException, UnsupportedTypeException { log.debug("test(byte[])"); int offset = match.getOffset(); String description = match.getDescription(); String type = match.getType(); String test = new String(match.getTest().array()); String mimeType = match.getMimeType(); log.debug("test(byte[]): testing byte[] data for '" + description + "'"); log.debug("test(byte[]): \n=== BEGIN MATCH INFO =="); log.debug(match.print()); log.debug("test(byte[]): \n=== END MATCH INFO ====\n"); int length = 0; if (type.equals("byte")) { length = 1; } else if (type.equals("short") || type.equals("leshort") || type.equals("beshort")) { length = 4; } else if (type.equals("long") || type.equals("lelong") || type.equals("belong")) { length = 8; } else if (type.equals("string")) { length = match.getTest().capacity(); } else if (type.equals("regex")) { // FIXME - something wrong here, shouldn't have to subtract 1??? length = data.length - offset - 1; if (length < 0) { length = 0; } } else if (type.equals("detector")) { // FIXME - something wrong here, shouldn't have to subtract 1??? length = data.length - offset - 1; if (length < 0) { length = 0; } } else { throw new UnsupportedTypeException("unsupported test type " + type); } byte[] buf = new byte[length]; log.debug("test(byte[]): offset=" + offset + ",length=" + length + ",data length=" + data.length); if ((offset + length) < data.length) { System.arraycopy(data, offset, buf, 0, length); log.debug("test(byte[]): stream size is '" + buf.length + "'"); MagicMatch match = null; MagicMatch submatch = null; if (testInternal(buf)) { // set the top level match to this one try { match = getMatch() != null ? (MagicMatch) getMatch().clone() : null; } catch (CloneNotSupportedException e) { // noop } log.debug("test(byte[]): testing matched '" + description + "'"); // set the data on this match if ((onlyMimeMatch == false) && (subMatchers != null) && (subMatchers.size() > 0)) { log.debug("test(byte[]): testing " + subMatchers.size() + " submatches for '" + description + "'"); for (int i = 0; i < subMatchers.size(); i++) { log.debug("test(byte[]): testing submatch " + i); MagicMatcher m = (MagicMatcher) subMatchers.get(i); if ((submatch = m.test(data, false)) != null) { log.debug("test(byte[]): submatch " + i + " matched with '" + submatch.getDescription() + "'"); match.addSubMatch(submatch); } else { log.debug("test(byte[]): submatch " + i + " doesn't match"); } } } } return match; } else { return null; } } /** * internal test switch * * @param data DOCUMENT ME! * @return DOCUMENT ME! */ private boolean testInternal(byte[] data) { log.debug("testInternal(byte[])"); if (data.length == 0) { return false; } String type = match.getType(); String test = new String(match.getTest().array()); String mimeType = match.getMimeType(); String description = match.getDescription(); ByteBuffer buffer = ByteBuffer.allocate(data.length); if ((type != null) && (test != null) && (test.length() > 0)) { if (type.equals("string")) { buffer = buffer.put(data); return testString(buffer); } else if (type.equals("byte")) { buffer = buffer.put(data); return testByte(buffer); } else if (type.equals("short")) { buffer = buffer.put(data); return testShort(buffer); } else if (type.equals("leshort")) { buffer = buffer.put(data); buffer.order(ByteOrder.LITTLE_ENDIAN); return testShort(buffer); } else if (type.equals("beshort")) { buffer = buffer.put(data); buffer.order(ByteOrder.BIG_ENDIAN); return testShort(buffer); } else if (type.equals("long")) { buffer = buffer.put(data); return testLong(buffer); } else if (type.equals("lelong")) { buffer = buffer.put(data); buffer.order(ByteOrder.LITTLE_ENDIAN); return testLong(buffer); } else if (type.equals("belong")) { buffer = buffer.put(data); buffer.order(ByteOrder.BIG_ENDIAN); return testLong(buffer); } else if (type.equals("regex")) { return testRegex(new String(data)); } else if (type.equals("detector")) { buffer = buffer.put(data); return testDetector(buffer); // } else if (type.equals("date")) { // return testDate(data, BIG_ENDIAN); // } else if (type.equals("ledate")) { // return testDate(data, LITTLE_ENDIAN); // } else if (type.equals("bedate")) { // return testDate(data, BIG_ENDIAN); } else { log.error("testInternal(byte[]): invalid test type '" + type + "'"); } } else { log.error("testInternal(byte[]): type or test is empty for '" + mimeType + " - " + description + "'"); } return false; } /** * test the data against the test byte * * @param data the data we are testing * * @return if we have a match */ private boolean testByte(ByteBuffer data) { log.debug("testByte()"); String test = new String(match.getTest().array()); char comparator = match.getComparator(); long bitmask = match.getBitmask(); String s = test; byte b = data.get(0); b = (byte) (b & bitmask); log.debug("testByte(): decoding '" + test + "' to byte"); int tst = Integer.decode(test).byteValue(); byte t = (byte) (tst & 0xff); log.debug("testByte(): applying bitmask '" + bitmask + "' to '" + tst + "', result is '" + t + "'"); log.debug("testByte(): comparing byte '" + b + "' to '" + t + "'"); switch (comparator) { case '=': return t == b; case '!': return t != b; case '>': return t > b; case '<': return t < b; } return false; } /** * test the data against the byte array * * @param data the data we are testing * * @return if we have a match */ private boolean testString(ByteBuffer data) { log.debug("testString()"); ByteBuffer test = match.getTest(); char comparator = match.getComparator(); byte[] b = data.array(); byte[] t = test.array(); boolean diff = false; int i = 0; for (i = 0; i < t.length; i++) { log.debug("testing byte '" + b[i] + "' from '" + new String(data.array()) + "' against byte '" + t[i] + "' from '" + new String(test.array()) + "'"); if (t[i] != b[i]) { diff = true; break; } } switch (comparator) { case '=': return !diff; case '!': return diff; case '>': return t[i] > b[i]; case '<': return t[i] < b[i]; } return false; } /** * test the data against a short * * @param data the data we are testing * * @return if we have a match */ private boolean testShort(ByteBuffer data) { log.debug("testShort()"); short val = 0; String test = new String(match.getTest().array()); char comparator = match.getComparator(); long bitmask = match.getBitmask(); val = byteArrayToShort(data); // apply bitmask before the comparison val = (short) (val & (short) bitmask); short tst = 0; try { tst = Integer.decode(test).shortValue(); } catch (NumberFormatException e) { log.error("testShort(): " + e); return false; //if (test.length() == 1) { // tst = new Integer(Character.getNumericValue(test.charAt(0))).shortValue(); //} } log.debug("testShort(): testing '" + Long.toHexString(val) + "' against '" + Long.toHexString(tst) + "'"); switch (comparator) { case '=': return val == tst; case '!': return val != tst; case '>': return val > tst; case '<': return val < tst; } return false; } /** * test the data against a long * * @param data the data we are testing * * @return if we have a match */ private boolean testLong(ByteBuffer data) { log.debug("testLong()"); long val = 0; String test = new String(match.getTest().array()); char comparator = match.getComparator(); long bitmask = match.getBitmask(); val = byteArrayToLong(data); // apply bitmask before the comparison val = val & bitmask; long tst = Long.decode(test).longValue(); log.debug("testLong(): testing '" + Long.toHexString(val) + "' against '" + test + "' => '" + Long.toHexString(tst) + "'"); switch (comparator) { case '=': return val == tst; case '!': return val != tst; case '>': return val > tst; case '<': return val < tst; } return false; } /** * test the data against a regex * * @param text the data we are testing * * @return if we have a match */ private boolean testRegex(String text) { log.debug("testRegex()"); String test = new String(match.getTest().array()); char comparator = match.getComparator(); Perl5Util utility = new Perl5Util(); log.debug("testRegex(): searching for '" + test + "'"); if (comparator == '=') { if (utility.match(test, text)) { return true; } else { return false; } } else if (comparator == '!') { if (utility.match(test, text)) { return false; } else { return true; } } return false; } /** * test the data using a detector * * @param data the data we are testing * * @return if we have a match */ private boolean testDetector(ByteBuffer data) { log.debug("testDetector()"); String detectorClass = new String(match.getTest().array()); try { log.debug("loading class: " + detectorClass); Class c = Class.forName(detectorClass); MagicDetector detector = (MagicDetector) c.newInstance(); String[] types = detector.process(data.array(), match.getOffset(), match.getLength(), match.getBitmask(), match.getComparator(), match.getMimeType(), match.getProperties()); if ((types != null) && (types.length > 0)) { // the match object has no mime type set, so set from the detector class processing match.setMimeType(types[0]); return true; } } catch (ClassNotFoundException e) { log.error("failed to load detector: " + detectorClass, e); } catch (InstantiationException e) { log.error("specified class is not a valid detector class: " + detectorClass, e); } catch (IllegalAccessException e) { log.error("specified class cannot be accessed: " + detectorClass, e); } return false; } /** * Get the extensions for the underlying detectory * * @return DOCUMENT ME! */ public String[] getDetectorExtensions() { log.debug("testDetector()"); String detectorClass = new String(match.getTest().array()); try { log.debug("loading class: " + detectorClass); Class c = Class.forName(detectorClass); MagicDetector detector = (MagicDetector) c.newInstance(); return detector.getHandledTypes(); } catch (ClassNotFoundException e) { log.error("failed to load detector: " + detectorClass, e); } catch (InstantiationException e) { log.error("specified class is not a valid detector class: " + detectorClass, e); } catch (IllegalAccessException e) { log.error("specified class cannot be accessed: " + detectorClass, e); } return new String[0]; } /** * encode a byte as an octal string * * @param b a byte of data * * @return an octal representation of the byte data */ private String byteToOctalString(byte b) { int n1; int n2; int n3; n1 = (b / 32) & 7; n2 = (b / 8) & 7; n3 = b & 7; return String.valueOf(n1) + String.valueOf(n2) + String.valueOf(n3); } /** * convert a byte array to a short * * @param data buffer of byte data * * @return byte array converted to a short */ private short byteArrayToShort(ByteBuffer data) { return data.getShort(0); } /** * convert a byte array to a long * * @param data buffer of byte data * * @return byte arrays (high and low bytes) converted to a long value */ private long byteArrayToLong(ByteBuffer data) { return (long) data.getInt(0); } /** * DOCUMENT ME! * * @return DOCUMENT ME! * * @throws CloneNotSupportedException DOCUMENT ME! */ protected Object clone() throws CloneNotSupportedException { MagicMatcher clone = new MagicMatcher(); clone.setMatch((MagicMatch) match.clone()); Iterator i = subMatchers.iterator(); ArrayList sub = new ArrayList(); while (i.hasNext()) { MagicMatcher m = (MagicMatcher) i.next(); sub.add(m.clone()); } clone.setSubMatchers(sub); return clone; } }