nl.mpi.tla.isle2clarin.Main.java Source code

Java tutorial

Introduction

Here is the source code for nl.mpi.tla.isle2clarin.Main.java

Source

/*
 * Copyright (C) 2014 The Language Archive - Max Planck Institute for Psycholinguistics
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package nl.mpi.tla.isle2clarin;

import eu.clarin.cmdi.validator.CMDIValidationHandlerAdapter;
import eu.clarin.cmdi.validator.CMDIValidationReport;
import eu.clarin.cmdi.validator.CMDIValidator;
import eu.clarin.cmdi.validator.CMDIValidatorConfig;
import eu.clarin.cmdi.validator.CMDIValidatorException;
import eu.clarin.cmdi.validator.SimpleCMDIValidatorProcessor;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeSet;
import joptsimple.OptionParser;
import joptsimple.OptionSet;
import nl.mpi.tla.schemanon.Message;
import nl.mpi.tla.schemanon.SchemAnon;
import nl.mpi.translation.tools.Translator;
import nl.mpi.translation.tools.TranslatorImpl;
import org.apache.commons.io.FileUtils;

/**
 * @author Menzo Windhouwer
 */
public class Main {
    public static void main(String[] args) {
        try {
            // initialize CMDI2IMDI
            boolean validateIMDI = false;
            boolean validateCMDI = false;
            TreeSet<String> skip = new TreeSet<>();
            Translator imdi2cmdi = new TranslatorImpl();
            SchemAnon tron = new SchemAnon(Main.class.getResource("/IMDI_3.0.xsd"));
            // check command line
            OptionParser parser = new OptionParser("ics:?*");
            OptionSet options = parser.parse(args);
            if (options.has("i"))
                validateIMDI = true;
            if (options.has("c"))
                validateCMDI = true;
            if (options.has("s"))
                skip = loadSkipList((String) options.valueOf("s"));
            if (options.has("?")) {
                showHelp();
                System.exit(0);
            }
            List arg = options.nonOptionArguments();
            if (arg.size() < 1 && arg.size() > 2) {
                System.err.println("FTL: none or too many non-option arguments!");
                showHelp();
                System.exit(1);
            }
            if (arg.size() > 1) {
                if (options.has("s")) {
                    System.err.println("FTL: -s option AND <FILE> argument, use only one!");
                    showHelp();
                    System.exit(1);
                }
                skip = loadSkipList((String) arg.get(1));
            }
            Collection<File> inputs = null;
            File in = new File((String) arg.get(0));
            if (in.isDirectory()) {
                inputs = FileUtils.listFiles(in, new String[] { "imdi" }, true);
            } else if (in.isFile()) {
                inputs = loadInputList(in);
            } else {
                System.err.println("FTL: unknown type of <INPUT>!");
                showHelp();
                System.exit(1);
            }
            int i = 0;
            int s = inputs.size();
            for (File input : inputs) {
                i++;
                try {
                    String path = input.getAbsolutePath();
                    //System.err.println("DBG: absolute path["+path+"]");
                    //System.err.println("DBG: relative path["+path.replaceAll("^" + in.getAbsolutePath() + "/", "")+"]");
                    if (input.isHidden()) {
                        System.err.println("WRN:" + i + "/" + s + ": file[" + path + "] is hidden, skipping it.");
                        continue;
                    } else if (path.matches(".*/(corpman|sessions)/.*")) {
                        System.err.println("WRN:" + i + "/" + s + ": file[" + path
                                + "] is in a corpman or sessions dir, skipping it.");
                        continue;
                    } else if (skip.contains(path.replaceAll("^" + in.getAbsolutePath() + "/", ""))) {
                        System.err.println(
                                "WRN:" + i + "/" + s + ": file[" + path + "] is in the skip list, skipping it.");
                        continue;
                    } else if (skip.contains(path)) {
                        System.err.println(
                                "WRN:" + i + "/" + s + ": file[" + path + "] is in the skip list, skipping it.");
                        continue;
                    } else
                        System.err.println("DBG:" + i + "/" + s + ": convert file["
                                + path.replaceAll("^" + (String) arg.get(0) + "/", "") + "]");
                    if (validateIMDI) {
                        // validate IMDI
                        if (!tron.validate(input)) {
                            System.err.println(
                                    "ERR:" + i + "/" + s + ": invalid file[" + input.getAbsolutePath() + "]");
                            for (Message msg : tron.getMessages()) {
                                System.out.println("" + (msg.isError() ? "ERR: " : "WRN: ") + i + "/" + s + ": "
                                        + (msg.getLocation() != null ? "at " + msg.getLocation() : ""));
                                System.out.println("" + (msg.isError() ? "ERR: " : "WRN: ") + i + "/" + s + ": "
                                        + msg.getText());
                            }
                        } else
                            System.err.println(
                                    "DBG:" + i + "/" + s + ": valid file[" + input.getAbsolutePath() + "]");
                    }
                    // IMDI 2 CMDI
                    File output = new File(input.getAbsolutePath().replaceAll("\\.imdi$", ".cmdi"));
                    PrintWriter out = new PrintWriter(output.getAbsolutePath());
                    Map<String, Object> params = new HashMap<>();
                    params.put("formatCMDI", Boolean.FALSE);
                    imdi2cmdi.setTransformationParameters(params);
                    out.print(imdi2cmdi.getCMDI(input.toURI().toURL(), ""));
                    out.close();
                    System.err.println("DBG:" + i + "/" + s + ": wrote   file[" + output.getAbsolutePath() + "]");
                    if (validateCMDI) {
                        CMDIValidatorConfig.Builder builder = new CMDIValidatorConfig.Builder(output,
                                new Handler());
                        CMDIValidator validator = new CMDIValidator(builder.build());
                        SimpleCMDIValidatorProcessor processor = new SimpleCMDIValidatorProcessor();
                        processor.process(validator);
                    }
                } catch (Exception ex) {
                    System.err.println("ERR:" + i + "/" + s + ":" + input + ":" + ex);
                    ex.printStackTrace(System.err);
                }
            }
        } catch (Exception ex) {
            System.err.println("FTL: " + ex);
            ex.printStackTrace(System.err);
        }
    }

    private static void showHelp() {
        System.err.println("INF: isle2clarin <options> -- <INPUT> <SKIP>?");
        System.err.println(
                "INF: <INPUT>   directory to recurse for IMDI files, or file with file paths (one per line) to process");
        System.err.println(
                "INF: <FILE>    file with file paths (one per line) to skip during processing (deprecated, better use the -s option)");
        System.err.println("INF: isle2clarin options:");
        System.err.println("INF: -i        enable IMDI validation (optional)");
        System.err.println("INF: -c        enable CMDI validation (optional)");
        System.err
                .println("INF: -s=<FILE> file with file paths (one per line) to skip during processing (optional)");
    }

    private static TreeSet<String> loadSkipList(String file) throws Exception {
        TreeSet<String> skip = new TreeSet<>();
        File sfile = new File(file);
        if (sfile.exists()) {
            BufferedReader sin = new BufferedReader(new InputStreamReader(new FileInputStream(sfile)));
            String line;
            while ((line = sin.readLine()) != null) {
                //if (line.startsWith("/")) {
                line = line.trim();
                System.err.println("DBG: skip[" + line + "]");
                skip.add(line);
                //}
            }
        }
        return skip;
    }

    private static Collection<File> loadInputList(File file) throws Exception {
        TreeSet<File> input = new TreeSet<>();
        if (file.exists()) {
            BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
            String line;
            while ((line = in.readLine()) != null) {
                line = line.trim();
                //System.err.println("DBG: input["+line+"]");
                File inf = new File(line);
                if (inf.exists())
                    input.add(inf);
                else
                    System.err.println("ERR: file[" + inf.getAbsolutePath() + "] doesn't exist!");
            }
        }
        return input;
    }

    private static class Handler extends CMDIValidationHandlerAdapter {
        @Override
        public void onValidationReport(final CMDIValidationReport report) throws CMDIValidatorException {
            final File file = report.getFile();
            int skip = 0;
            switch (report.getHighestSeverity()) {
            case INFO:
                System.err.println("DBG: file[" + file + "] is valid");
                break;
            case WARNING:
                System.err.println("WRN: file [" + file + "] is valid (with warnings):");
                for (CMDIValidationReport.Message msg : report.getMessages()) {
                    if (msg.getMessage().contains("Failed to read schema document ''")) {
                        skip++;
                        continue;
                    }
                    if ((msg.getLineNumber() != -1) && (msg.getColumnNumber() != -1)) {
                        System.err.println(" (" + msg.getSeverity().getShortcut() + ") " + msg.getMessage()
                                + " [line=" + msg.getLineNumber() + ", column=" + msg.getColumnNumber() + "]");
                    } else {
                        System.err.println(" (" + msg.getSeverity().getShortcut() + ") " + msg.getMessage());
                    }
                }
                break;
            case ERROR:
                System.err.println("ERR: file [" + file + "] is invalid:");
                for (CMDIValidationReport.Message msg : report.getMessages()) {
                    if (msg.getMessage().contains("Failed to read schema document ''")) {
                        skip++;
                        continue;
                    }
                    if ((msg.getLineNumber() != -1) && (msg.getColumnNumber() != -1)) {
                        System.err.println(" (" + msg.getSeverity().getShortcut() + ") " + msg.getMessage()
                                + " [line=" + msg.getLineNumber() + ", column=" + msg.getColumnNumber() + "]");
                    } else {
                        System.err.println(" (" + msg.getSeverity().getShortcut() + ") " + msg.getMessage());
                    }
                }
                break;
            default:
                throw new CMDIValidatorException("unexpected severity: " + report.getHighestSeverity());
            } // switch
            if (skip > 0)
                System.err.println(
                        "WRN: skipped [" + skip + "] warnings due to lax validation of foreign namespaces");
        }
    } // class Handler    
}