nl.mpi.tla.lat2fox.Main.java Source code

Java tutorial

Introduction

Here is the source code for nl.mpi.tla.lat2fox.Main.java

Source

/*
 * Copyright (C) 2014 The Language Archive - Max Planck Institute for Psycholinguistics
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package nl.mpi.tla.lat2fox;

import java.io.File;
import java.util.Collection;
import java.util.List;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import joptsimple.OptionParser;
import joptsimple.OptionSet;
import net.sf.saxon.s9api.QName;
import net.sf.saxon.s9api.XdmAtomicValue;
import net.sf.saxon.s9api.XdmDestination;
import net.sf.saxon.s9api.XdmNode;
import net.sf.saxon.s9api.XsltExecutable;
import net.sf.saxon.s9api.XsltTransformer;
import nl.mpi.tla.schemanon.Message;
import nl.mpi.tla.schemanon.SchemAnon;
import nl.mpi.tla.schemanon.SaxonUtils;
import org.apache.commons.io.FileUtils;

/**
 * @author Menzo Windhouwer
 */
public class Main {

    private static void showHelp() {
        System.err.println("INF: lat2fox <options> -- <DIR>?");
        System.err.println("INF: <DIR>      source directory to recurse for CMD files (default: .)");
        System.err.println("INF: lat2fox options:");
        System.err.println("INF: -e=<EXT>   the extension of CMD records (default: cmdi)");
        System.err.println("INF: -r=<FILE>  load/store the relations map from/in this <FILE> (optional)");
        System.err.println("INF: -f=<DIR>   directory to store the FOX files (default: ./fox)");
        System.err.println("INF: -x=<DIR>   directory to store the FOX files with problems (default: ./fox-error)");
        System.err.println("INF: -p=<DIR>   directory to pickup policies (default: ./policies)");
        System.err.println("INF: -q=<DIR>   directory to pickup management info (default: ./management)");
        System.err.println("INF: -i=<DIR>   replace source <DIR> by this <DIR> in the FOX files (optional)");
        System.err.println(
                "INF: -n=<NUM>   create subdirectories to contain <NUM> FOX files (default: 0, i.e., no subdirectories)");
        System.err.println("INF: -c=<FILE>  file containing the mapping to collections (optional)");
        System.err
                .println("INF: -d=<FILE>  stylesheet containing the mapping from CMD to Dublin Core (recommended)");
        System.err.println(
                "INF: -m=<FILE>  stylesheet containing the mapping from CMD to other (non CMD and non DC) metadata formats (optional)");
        System.err.println(
                "INF: -o=<XPATH> XPath 2.0 expressions determining if the CMD should be offered via OAI-PMH");
        System.err.println(
                "INF: -a=<XPATH> XPath 2.0 expressions determining if the DO should always be a collection and a compound");
        System.err.println("INF: -s=<NAME>  name of the server/repository used by OAI");
        System.err.println("INF: -b=<DIR>   directory where the icons are stored (default: /app/flat/icons)");
        System.err.println("INF: -v         validate the FOX files (optional)");
        System.err.println("INF: -l         lax check if a local resource exists (optional)");
        System.err.println(
                "INF: -h         don't create a CMD object as first child of the compound, but include the CMD in the compound itself (optional)");
        System.err.println("INF: -z         skip the relations check");
    }

    public static void main(String[] args) {
        File rfile = null;
        String dir = ".";
        String fdir = null;
        String idir = null;
        String bdir = null;
        String xdir = null;
        String pdir = null;
        String mdir = null;
        String cext = "cmdi";
        String cfile = null;
        String dfile = null;
        String mfile = null;
        String oxp = null;
        String axp = null;
        String server = null;
        XdmNode collsDoc = null;
        boolean validateFOX = false;
        boolean laxResourceCheck = false;
        boolean createCMDObject = true;
        boolean relationCheck = true;
        int ndir = 0;
        // check command line
        OptionParser parser = new OptionParser("zhlve:r:f:i:x:p:q:n:c:d:m:o:a:s:b:?*");
        OptionSet options = parser.parse(args);
        if (options.has("l"))
            laxResourceCheck = true;
        if (options.has("v"))
            validateFOX = true;
        if (options.has("h"))
            createCMDObject = false;
        if (options.has("z"))
            relationCheck = false;
        if (options.has("e"))
            cext = (String) options.valueOf("e");
        if (options.has("r"))
            rfile = new File((String) options.valueOf("r"));
        if (options.has("f"))
            fdir = (String) options.valueOf("f");
        if (options.has("i"))
            idir = (String) options.valueOf("i");
        if (options.has("x"))
            xdir = (String) options.valueOf("x");
        if (options.has("p"))
            pdir = (String) options.valueOf("p");
        if (options.has("q"))
            mdir = (String) options.valueOf("q");
        if (options.has("b"))
            bdir = (String) options.valueOf("b");
        if (options.has("c")) {
            cfile = (String) options.valueOf("c");
            File c = new File(cfile);
            if (!c.isFile()) {
                System.err.println("FTL: -c expects a <FILE> argument!");
                showHelp();
                System.exit(1);
            }
            if (!c.canRead()) {
                System.err.println("FTL: -c <FILE> argument isn't readable!");
                showHelp();
                System.exit(1);
            }
            try {
                collsDoc = SaxonUtils.buildDocument(new StreamSource(cfile));
            } catch (Exception ex) {
                System.err.println("FTL: can't read collection <FILE>[" + cfile + "]: " + ex);
                ex.printStackTrace(System.err);
            }
        }
        if (options.has("d")) {
            dfile = (String) options.valueOf("d");
            File d = new File(dfile);
            if (!d.isFile()) {
                System.err.println("FTL: -d expects a <FILE> argument!");
                showHelp();
                System.exit(1);
            }
            if (!d.canRead()) {
                System.err.println("FTL: -d <FILE> argument isn't readable!");
                showHelp();
                System.exit(1);
            }
        }
        if (options.has("m")) {
            mfile = (String) options.valueOf("m");
            File m = new File(mfile);
            if (!m.isFile()) {
                System.err.println("FTL: -m expects a <FILE> argument!");
                showHelp();
                System.exit(1);
            }
            if (!m.canRead()) {
                System.err.println("FTL: -m <FILE> argument isn't readable!");
                showHelp();
                System.exit(1);
            }
        }
        if (options.has("n")) {
            try {
                ndir = Integer.parseInt((String) options.valueOf("n"));
            } catch (NumberFormatException e) {
                System.err.println("FTL: -n expects a numeric argument!");
                showHelp();
                System.exit(1);
            }
        }
        if (options.has("o")) {
            oxp = (String) options.valueOf("o");
        }
        if (options.has("a")) {
            axp = (String) options.valueOf("a");
        }
        if (options.has("s")) {
            server = (String) options.valueOf("s");
        }
        if (options.has("?")) {
            showHelp();
            System.exit(0);
        }

        List arg = options.nonOptionArguments();
        if (arg.size() > 1) {
            System.err.println("FTL: only one source <DIR> argument is allowed!");
            showHelp();
            System.exit(1);
        }
        if (arg.size() == 1)
            dir = (String) arg.get(0);

        try {
            SaxonExtensionFunctions.registerAll(SaxonUtils.getProcessor().getUnderlyingConfiguration());
        } catch (Exception e) {
            System.err.println("ERR: couldn't register the Saxon extension functions: " + e);
            e.printStackTrace();
        }
        try {
            if (fdir == null)
                fdir = dir + "/fox";
            if (xdir == null)
                xdir = dir + "/fox-error";
            if (pdir == null)
                pdir = dir + "/policies";
            if (mdir == null)
                mdir = dir + "/management";
            XdmNode relsDoc = null;
            if (rfile != null && rfile.exists()) {
                relsDoc = SaxonUtils.buildDocument(new StreamSource(rfile));
                System.err.println("DBG: loaded[" + rfile.getAbsolutePath() + "]");
            } else {
                // create lookup document for relations
                XsltTransformer rels = SaxonUtils.buildTransformer(Main.class.getResource("/cmd2rels.xsl")).load();
                rels.setParameter(new QName("ext"), new XdmAtomicValue(cext));
                rels.setParameter(new QName("dir"), new XdmAtomicValue("file:" + dir));
                rels.setSource(new StreamSource(Main.class.getResource("/null.xml").toString()));
                XdmDestination dest = new XdmDestination();
                rels.setDestination(dest);
                rels.transform();
                relsDoc = dest.getXdmNode();
                if (rfile != null) {
                    TransformerFactory.newInstance().newTransformer().transform(relsDoc.asSource(),
                            new StreamResult(rfile));
                    System.err.println("DBG: saved[" + rfile.getAbsolutePath() + "]");
                }
            }
            if (relationCheck) {
                // Check the relations
                XsltTransformer rcheck = SaxonUtils.buildTransformer(Main.class.getResource("/checkRels.xsl"))
                        .load();
                rcheck.setParameter(new QName("rels-doc"), relsDoc);
                rcheck.setSource(new StreamSource(Main.class.getResource("/null.xml").toString()));
                XdmDestination dest = new XdmDestination();
                rcheck.setDestination(dest);
                rcheck.transform();
            }
            //System.exit(0);
            // CMDI 2 FOX
            // create the fox dirs
            FileUtils.forceMkdir(new File(fdir));
            FileUtils.forceMkdir(new File(xdir));
            Collection<File> inputs = FileUtils.listFiles(new File(dir), new String[] { cext }, true);
            // if there is a CMD 2 DC or 2 other XSLT include it
            XsltExecutable cmd2fox = null;
            if (dfile != null || mfile != null) {
                XsltTransformer inclCMD2DC = SaxonUtils
                        .buildTransformer(Main.class.getResource("/inclCMD2DCother.xsl")).load();
                inclCMD2DC.setSource(new StreamSource(Main.class.getResource("/cmd2fox.xsl").toString()));
                if (dfile != null)
                    inclCMD2DC.setParameter(new QName("cmd2dc"),
                            new XdmAtomicValue("file://" + (new File(dfile)).getAbsolutePath()));
                if (mfile != null)
                    inclCMD2DC.setParameter(new QName("cmd2other"),
                            new XdmAtomicValue("file://" + (new File(mfile)).getAbsolutePath()));
                XdmDestination destination = new XdmDestination();
                inclCMD2DC.setDestination(destination);
                inclCMD2DC.transform();
                cmd2fox = SaxonUtils.buildTransformer(destination.getXdmNode());
            } else
                cmd2fox = SaxonUtils.buildTransformer(Main.class.getResource("/cmd2fox.xsl"));
            int err = 0;
            int i = 0;
            int s = inputs.size();
            for (File input : inputs) {
                i++;
                if (!input.isHidden() && !input.getAbsolutePath().matches(".*/(corpman|sessions)/.*")) {
                    try {
                        XsltTransformer fox = cmd2fox.load();
                        //fox.setParameter(new QName("rels-uri"), new XdmAtomicValue("file:"+map.getAbsolutePath()));
                        fox.setParameter(new QName("rels-doc"), relsDoc);
                        fox.setParameter(new QName("conversion-base"), new XdmAtomicValue(dir));
                        if (idir != null)
                            fox.setParameter(new QName("import-base"), new XdmAtomicValue(idir));
                        fox.setParameter(new QName("fox-base"), new XdmAtomicValue(fdir));
                        fox.setParameter(new QName("lax-resource-check"), new XdmAtomicValue(laxResourceCheck));
                        if (collsDoc != null)
                            fox.setParameter(new QName("collections-map"), collsDoc);
                        if (server != null)
                            fox.setParameter(new QName("repository"), new XdmAtomicValue(server));
                        if (oxp != null)
                            fox.setParameter(new QName("oai-include-eval"), new XdmAtomicValue(oxp));
                        if (axp != null) {
                            fox.setParameter(new QName("always-collection-eval"), new XdmAtomicValue(axp));
                            fox.setParameter(new QName("always-compound-eval"), new XdmAtomicValue(axp));
                        }
                        if (bdir != null)
                            fox.setParameter(new QName("icon-base"), new XdmAtomicValue(bdir));
                        if (pdir != null)
                            fox.setParameter(new QName("policies-dir"), new XdmAtomicValue(pdir));
                        if (mdir != null)
                            fox.setParameter(new QName("management-dir"), new XdmAtomicValue(mdir));
                        fox.setParameter(new QName("create-cmd-object"), new XdmAtomicValue(createCMDObject));
                        fox.setSource(new StreamSource(input));
                        XdmDestination destination = new XdmDestination();
                        fox.setDestination(destination);
                        fox.transform();
                        String fid = SaxonUtils.evaluateXPath(destination.getXdmNode(), "/*/@PID").evaluateSingle()
                                .getStringValue();
                        File out = new File(fdir + "/" + fid.replaceAll("[^a-zA-Z0-9]", "_") + "_CMD.xml");
                        if (out.exists()) {
                            System.err.println(
                                    "ERR:" + i + "/" + s + ": FOX[" + out.getAbsolutePath() + "] already exists!");
                            out = new File(xdir + "/lat-error-" + (++err) + ".xml");
                            System.err.println("WRN:" + i + "/" + s + ": saved to FOX[" + out.getAbsolutePath()
                                    + "] instead!");
                        }
                        TransformerFactory.newInstance().newTransformer()
                                .transform(destination.getXdmNode().asSource(), new StreamResult(out));
                        System.err.println("DBG:" + i + "/" + s + ": created[" + out.getAbsolutePath() + "]");
                    } catch (Exception e) {
                        System.err.println("ERR:" + i + "/" + s + ": " + e);
                        System.err
                                .println("WRN:" + i + "/" + s + ": skipping file[" + input.getAbsolutePath() + "]");
                    }
                }
            }
            if (ndir > 0) {
                int n = 0;
                int d = 0;
                inputs = FileUtils.listFiles(new File(fdir), new String[] { "xml" }, true);
                i = 0;
                s = inputs.size();
                for (File input : inputs) {
                    i++;
                    if (n == ndir)
                        n = 0;
                    n++;
                    FileUtils.moveFileToDirectory(input, new File(fdir + "/" + (n == 1 ? ++d : d)), true);
                    if (n == 1)
                        System.err.println("DBG:" + i + "/" + s + ": moved to dir[" + fdir + "/" + d + "]");
                }
            }
            if (validateFOX) {
                SchemAnon tron = new SchemAnon(Main.class.getResource("/foxml1-1.xsd"), "ingest");
                inputs = FileUtils.listFiles(new File(fdir), new String[] { "xml" }, true);
                i = 0;
                s = inputs.size();
                for (File input : inputs) {
                    i++;
                    // validate FOX
                    if (!tron.validate(input)) {
                        System.err
                                .println("ERR:" + i + "/" + s + ": invalid file[" + input.getAbsolutePath() + "]");
                        for (Message msg : tron.getMessages()) {
                            System.out.println("" + (msg.isError() ? "ERR: " : "WRN: ") + i + "/" + s + ": "
                                    + (msg.getLocation() != null ? "at " + msg.getLocation() : ""));
                            System.out.println(
                                    "" + (msg.isError() ? "ERR: " : "WRN: ") + i + "/" + s + ": " + msg.getText());
                        }
                    } else
                        System.err.println("DBG:" + i + "/" + s + ": valid file[" + input.getAbsolutePath() + "]");
                }
            }
        } catch (Exception ex) {
            System.err.println("FTL: " + ex);
            ex.printStackTrace(System.err);
        }
    }
}