com.gc.iotools.fmt.detect.droid.DroidDetectorImpl.java Source code

Java tutorial

Introduction

Here is the source code for com.gc.iotools.fmt.detect.droid.DroidDetectorImpl.java

Source

package com.gc.iotools.fmt.detect.droid;

/*
 * Copyright (c) 2008, 2014 Gabriele Contini. This source code is released
 * under the BSD License.
 */
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;

import uk.gov.nationalarchives.droid.base.FileFormatHit;
import uk.gov.nationalarchives.droid.binFileReader.ByteReader;
import uk.gov.nationalarchives.droid.binFileReader.IdentificationFile;
import uk.gov.nationalarchives.droid.binFileReader.RandomAccessByteReader;
import uk.gov.nationalarchives.droid.signatureFile.FFSignatureFile;
import uk.gov.nationalarchives.droid.signatureFile.FileFormat;
import uk.gov.nationalarchives.droid.signatureFile.InternalSignature;
import uk.gov.nationalarchives.droid.xmlReader.SAXModelBuilder;

import com.gc.iotools.fmt.base.DetectionLibrary;
import com.gc.iotools.fmt.base.FormatEnum;
import com.gc.iotools.fmt.base.FormatId;
import com.gc.iotools.fmt.base.ResettableInputStream;

/**
 * Implementation of a FileDetector that relies on droid classes.
 * 
 * @author dvd.smnt
 */
public class DroidDetectorImpl implements DetectionLibrary {
    /**
     * Namespace for the xml file format signatures file.
     */
    private static final String SIGNATURE_FILE_NS = "http://www.nationalarchives.gov.uk/pronom/SignatureFile";
    private static final String SIGNATURE_FILE = "DROID_SignatureFile_V18.xml";
    private static final String MAPPING_FILE = "mapping.properties";

    private final Class<? extends FormatEnum> formatEnumClass;
    private static final Map<String, FFSignatureFile> CONF_MAP = new HashMap<String, FFSignatureFile>();
    private final String configFile;

    private final Properties mapping;
    private static final Logger LOG = LoggerFactory.getLogger(DroidDetectorImpl.class);

    public DroidDetectorImpl() {
        this(FormatEnum.class, SIGNATURE_FILE, MAPPING_FILE);
    }

    public DroidDetectorImpl(final Class<? extends FormatEnum> formatEnumClass, final String signatureFile,
            final String mappingFileStr) {
        final Class<? extends FormatEnum> clazz = (formatEnumClass == null ? FormatEnum.class : formatEnumClass);
        if (!(FormatEnum.class.isAssignableFrom(clazz))) {
            throw new IllegalArgumentException(
                    " [" + formatEnumClass + "] should be an subclass of [" + FormatEnum.class + "]");
        }
        this.formatEnumClass = clazz;
        this.configFile = StringUtils.isBlank(signatureFile) ? SIGNATURE_FILE : signatureFile;

        if (!CONF_MAP.containsKey(this.configFile)) {
            final URL confFile = DroidDetectorImpl.class.getResource(this.configFile);
            try {
                if ((confFile == null) || (confFile.openConnection() == null)) {
                    throw new IllegalArgumentException(
                            "Configuration file [" + signatureFile + "] not found or not readable.");
                }
            } catch (final IOException e) {
                throw new IllegalArgumentException(
                        "Problem reading configuration file [" + signatureFile + "] url[" + confFile + "]", e);
            }
            final FFSignatureFile fsigfile = parseSigFile(confFile);
            CONF_MAP.put(this.configFile, fsigfile);
        }
        this.mapping = new Properties();
        try {
            final InputStream mappingIs = DroidDetectorImpl.class.getResourceAsStream(mappingFileStr);
            this.mapping.load(mappingIs);
            mappingIs.close();
        } catch (final IOException e) {
            throw new IllegalArgumentException("can't load resource[" + mappingFileStr + "]", e);
        }
    }

    /**
     * {@inheritDoc}
     */
    public FormatId detect(final FormatEnum[] enabledFormats, final ResettableInputStream stream)
            throws IOException {
        final IdentificationFile idFile = new IdentificationFile("-");
        //      File file = File.createTempFile("io-tools-doc", ".doc");
        //      FileOutputStream fos = new FileOutputStream(file);
        //      IOUtils.copy(stream, fos);
        //      fos.close();
        // System.out.println("Data written to [" + file.getName() + "]");
        stream.resetToBeginning();
        final ByteReader testFile = new RandomAccessByteReader(idFile, stream);
        //      FileByteReader testFile = new FileByteReader(new IdentificationFile(
        //            file.getAbsolutePath()), true, file.getAbsolutePath());
        final FFSignatureFile fsigfile = CONF_MAP.get(this.configFile);
        FFSignatureFile reduced = reduceDetectedSequences(fsigfile, enabledFormats);
        reduced.runFileIdentification(testFile);
        final int n = testFile.getNumHits();
        FormatId fenumId = new FormatId(FormatEnum.UNKNOWN, null);
        final Collection<FormatEnum> enabledFormatCollection = Arrays.asList(enabledFormats);
        for (int i = 0; (i < n) && (FormatEnum.UNKNOWN.equals(fenumId.format)); i++) {
            final FileFormatHit ffhit = testFile.getHit(i);
            final uk.gov.nationalarchives.droid.signatureFile.FileFormat fileFormat = ffhit.getFileFormat();
            final FormatId tmpFid = getFormatEnum(fileFormat);
            if (FormatEnum.UNLISTED.equals(tmpFid.format)) {
                LOG.warn("Format number[" + fileFormat.getID() + "] not found in configured mapping. format ["
                        + fileFormat.getName() + "] was returned as [" + FormatEnum.UNLISTED + "] version["
                        + fileFormat.getName() + "]");
            }
            if (enabledFormatCollection.contains(tmpFid.format)) {
                fenumId = tmpFid;
            }
        }
        return fenumId;
    }

    private FFSignatureFile reduceDetectedSequences(FFSignatureFile fsig, FormatEnum[] enabled) {
        Collection<FileFormat> fformats = new ArrayList<FileFormat>();
        Collection<InternalSignature> intSigs = new ArrayList<InternalSignature>();
        Collection<FormatEnum> enabledColl = new ArrayList<FormatEnum>(Arrays.asList(enabled));
        Map<Integer, InternalSignature> internalSignatureMap = new HashMap<Integer, InternalSignature>();

        for (int i = 0; i < fsig.getNumInternalSignatures(); i++) {
            InternalSignature intSignature = fsig.getInternalSignature(i);
            internalSignatureMap.put(intSignature.getID(), intSignature);
        }
        int n = fsig.getNumFileFormats();
        for (int i = 0; i < n; i++) {
            FileFormat ff = fsig.getFileFormat(i);
            final FormatId formatEnum = getFormatEnum(ff);
            if (ff.getNumInternalSignatures() > 0 && enabledColl.contains(formatEnum.format)) {
                fformats.add(ff);
                for (int j = 0; j < ff.getNumInternalSignatures(); j++) {
                    int intSigId = ff.getInternalSignatureID(j);
                    InternalSignature intSig = internalSignatureMap.get(intSigId);
                    if (intSig != null) {
                        intSigs.add(intSig);
                    } else {
                        LOG.warn("Internal signature id[" + intSig + "] not found.");
                    }
                }
            }
        }
        return new FFSignatureFile(fformats, intSigs);
    }

    /**
     * {@inheritDoc}
     */
    public FormatEnum[] getDetectedFormats() {
        final FFSignatureFile fsigfile = CONF_MAP.get(this.configFile);
        final Collection<FormatEnum> result = new ArrayList<FormatEnum>();
        for (int i = 0; i < fsigfile.getNumFileFormats(); i++) {
            final FileFormat fformat = fsigfile.getFileFormat(i);
            final FormatId fid = getFormatEnum(fformat);
            final FormatEnum fenum = fid.format;
            if (!FormatEnum.UNLISTED.equals(fenum) && !FormatEnum.UNKNOWN.equals(fenum)) {
                result.add(fenum);
            }
        }
        return result.toArray(new FormatEnum[result.size()]);
    }

    private FormatId getFormatEnum(final FileFormat id) {
        FormatEnum fenum = null;
        for (final Object key : this.mapping.keySet()) {
            final String value = this.mapping.getProperty((String) key);
            if (value.contains(Integer.toString(id.getID()))) {
                final String kname = (String) key;
                fenum = FormatEnum.getEnum(this.formatEnumClass, kname);
                break;
            }
        }
        FormatId result;
        if (fenum == null) {
            result = new FormatId(FormatEnum.UNLISTED, id.getName());
        } else {
            result = new FormatId(fenum, id.getVersion());
        }
        return result;
    }

    private XMLReader getXMLReader(final SAXModelBuilder mb) throws Exception {
        final SAXParserFactory factory = SAXParserFactory.newInstance();
        factory.setNamespaceAware(true);
        // factory.setValidating(true);
        final SAXParser saxParser = factory.newSAXParser();
        final XMLReader parser = saxParser.getXMLReader();
        // URL url = DroidDetectorImpl.class
        // .getResource("DROID_SignatureFile.xsd");
        // parser.setProperty(
        // "http://java.sun.com/xml/jaxp/properties/schemaSource", url);
        mb.setupNamespace(SIGNATURE_FILE_NS, true);
        parser.setContentHandler(mb);
        return parser;
    }

    // private void addFiles() {
    // // Process each file in array of filenames
    // for (int fileNum = 0; fileNum < this.myFiles.length
    // && !this.myAnalysisController.isAnalysisCancelled(); fileNum++) {
    // this.logger.recordPath(this.myFiles[fileNum]);
    // if (UrlByteReader.isURL(this.myFiles[fileNum])) {
    // // File is a URL
    // processFile(new IdentificationFile(this.myFiles[fileNum]));
    // } else if (InputStreamByteReader
    // .isInputStream(this.myFiles[fileNum])) {
    // // File is an input stream
    // processFile(new IdentificationFile(this.myFiles[fileNum]));
    // } else {
    // // File is local
    // addFile(this.myFiles[fileNum]);
    // }
    // }
    // }

    private FFSignatureFile parseSigFile(final URL signatureFileURL) {

        final SAXModelBuilder mb = new SAXModelBuilder();
        try {
            final XMLReader parser = getXMLReader(mb);
            final InputStream signatureFileStream = signatureFileURL.openStream();
            parser.parse(new InputSource(signatureFileStream));
            signatureFileStream.close();
        } catch (final Exception e) {
            throw new IllegalStateException("Error reading configuration file " + "[" + signatureFileURL + "]", e);
        }
        final FFSignatureFile fsgf = (FFSignatureFile) mb.getModel();
        fsgf.prepareForUse();
        return fsgf;
    }
}