Java tutorial
/* Copyright 2014 Universidad Politcnica de Madrid - Center for Open Middleware (http://www.centeropenmiddleware.com) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package com.c4om.jschematronvalidator; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.Reader; import java.io.Writer; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.TransformerFactoryConfigurationError; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.apache.commons.cli.BasicParser; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.io.input.XmlStreamReader; import org.apache.commons.io.output.XmlStreamWriter; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.w3c.dom.Document; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import com.c4om.jschematronvalidator.xslt2.SaxonXSLT2BasedValidationMethod; /** * Hello world! * @author Pablo Alonso Rodriguez (Center for Open Middleware - UPM) */ @SuppressWarnings("static-access") public class JSchematronValidatorMain { /** * {@link Logger} for this class */ private static final Logger LOGGER = LogManager.getLogger(); /** * Short option to display help */ private static final String CMD_LINE_OPTION_HELP = "h"; /** * Short option to choose the XSLT2 validation method (the default one) */ private static final String CMD_LINE_OPTION_XSLT2 = "x2"; /** * Short option for the output */ private static final String CMD_LINE_OPTION_OUTPUT = "o"; /** * Short option for skip generate fired rule. */ private static final String CMD_LINE_OPTION_SKIP_GENERATE_FIRED_RULE = "sfr"; /** * Short option for skip generate pats. */ private static final String CMD_LINE_OPTION_SKIP_GENERATE_PATHS = "sp"; /** * Short option for skip diagnose. */ private static final String CMD_LINE_OPTION_SKIP_DIAGNOSE = "sd"; /** * Short option for allow foreign. */ private static final String CMD_LINE_OPTION_ALLOW_FOREIGN = "af"; /** * Short option to choose the validation phase. */ private static final String CMD_LINE_OPTION_PHASE = "ph"; /** * Short option to indicate the schematron input file. */ private static final String CMD_LINE_OPTION_SCHEMATRON_FILE = "sch"; /** * Short option to indicate the XML input file. */ private static final String CMD_LINE_OPTION_INPUT = "i"; /** * Short option to indicate that errors due to unavailable optional files (like those imported by XSLT <code>document()</code> function) should not be printed. */ private static final String CMD_LINE_OPTION_SKIP_OPTIONAL_FILES_ERRORS = "sofe"; /** * Command line syntax. */ private static final String CMD_LINE_SYNTAX = "JSchematronValidator {--help | other arguments}"; /** * Command line options for this program. * @see Options */ private static final Options CMD_LINE_OPTIONS; static { // create the Options CMD_LINE_OPTIONS = new Options(); CMD_LINE_OPTIONS.addOption(CMD_LINE_OPTION_HELP, "help", false, "Show this help message"); CMD_LINE_OPTIONS.addOption(CMD_LINE_OPTION_XSLT2, "xslt2", false, "This option chooses the XSLT2 validation method (the only one currently availabe, so actually it has no effect)."); CMD_LINE_OPTIONS.addOption(OptionBuilder.withLongOpt("inputFile").hasArg() .withDescription("The input document to be validated (if absent, standard input will be read)") .withArgName("inputXMLFile").create(CMD_LINE_OPTION_INPUT)); CMD_LINE_OPTIONS.addOption(OptionBuilder.withLongOpt("schemaFile").hasArg() .withDescription("The input schema to validate against.").withArgName("inputSchemaFile") .create(CMD_LINE_OPTION_SCHEMATRON_FILE)); CMD_LINE_OPTIONS.addOption(OptionBuilder.withLongOpt("phase").hasArg() .withDescription("The validation phase").withArgName("phase").create(CMD_LINE_OPTION_PHASE)); CMD_LINE_OPTIONS.addOption(OptionBuilder.withLongOpt("allow-foreign").withDescription( "Do not remove non-Schematron elements and rich markup during intermediate validation steps.") .create(CMD_LINE_OPTION_ALLOW_FOREIGN)); CMD_LINE_OPTIONS.addOption(OptionBuilder.withLongOpt("skip-diagnose") .withDescription("Do not print sch:diagnostics messages to output SVRL.") .create(CMD_LINE_OPTION_SKIP_DIAGNOSE)); CMD_LINE_OPTIONS.addOption(OptionBuilder.withLongOpt("skip-generate-paths") .withDescription("Do not generate the @location attribute with XPaths") .create(CMD_LINE_OPTION_SKIP_GENERATE_PATHS)); CMD_LINE_OPTIONS.addOption(OptionBuilder.withLongOpt("skip-generate-fired-rule") .withDescription("Do not generate svrl:fired-rule elements") .create(CMD_LINE_OPTION_SKIP_GENERATE_FIRED_RULE)); CMD_LINE_OPTIONS.addOption(OptionBuilder.withLongOpt("skip-optional-files-errors") .withDescription("Do not print errors due to unavailable optional files") .create(CMD_LINE_OPTION_SKIP_OPTIONAL_FILES_ERRORS)); CMD_LINE_OPTIONS.addOption(OptionBuilder.withLongOpt("outputFile").hasArg().withDescription( "The location where the output SVRL document will be saved (if absent, it will be printed to standard output)") .withArgName("inputXMLFile").create(CMD_LINE_OPTION_OUTPUT)); } /** * Reads a {@link org.w3c.dom.Document} from an {@link InputSource} * @param inputSource the input source * @return the read {@link Document} * @throws ParserConfigurationException * @throws SAXException * @throws IOException */ public static Document loadW3CDocumentFromInputSource(InputSource inputSource) throws ParserConfigurationException, SAXException, IOException { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setNamespaceAware(true); DocumentBuilder documentBuilder = factory.newDocumentBuilder(); Document resultingDocument = documentBuilder.parse(inputSource); return resultingDocument; } /** * Reads a {@link org.w3c.dom.Document} from an {@link InputStream}. Encoding is automatically * determined as specified by corresponding RFCs. * @param is the input stream. * @return the read {@link Document} * @throws ParserConfigurationException * @throws SAXException * @throws IOException */ public static Document loadW3CDocumentFromInputStream(InputStream is) throws ParserConfigurationException, SAXException, IOException { Reader xmlStreamReader = new XmlStreamReader(is, true); return loadW3CDocumentFromInputSource(new InputSource(xmlStreamReader)); } /** * Reads a {@link org.w3c.dom.Document} from an {@link InputStream}. Encoding is automatically * determined as specified by corresponding RFCs. * @param is the input stream. * @param encoding the fallback encoding, if the actual one could not be determined * @return the read {@link Document} * @throws ParserConfigurationException * @throws SAXException * @throws IOException */ public static Document loadW3CDocumentFromInputStream(InputStream is, String encoding) throws ParserConfigurationException, SAXException, IOException { Reader xmlStreamReader = new XmlStreamReader(is, true, encoding); return loadW3CDocumentFromInputSource(new InputSource(xmlStreamReader)); } /** * Reads a {@link org.w3c.dom.Document} from a {@link File}. Encoding is automatically * determined as specified by corresponding RFCs. * @param file a {@link File} object. * @return the read {@link Document} * @throws ParserConfigurationException * @throws SAXException * @throws IOException */ public static Document loadW3CDocumentFromInputFile(File file) throws ParserConfigurationException, SAXException, IOException { Reader xmlStreamReader = new XmlStreamReader(file); return loadW3CDocumentFromInputSource(new InputSource(xmlStreamReader)); } // /** // * This method reads a {@link org.w3c.dom.Document} from an {@link InputStream}. // * @param file the file // * @param encoding the encoding // * @return the read {@link org.w3c.dom.Document} // * @throws IOException // * @throws ParserConfigurationException // * @throws SAXException // */ // private static Document loadW3CDocumentFromFile(File file, String encoding) throws IOException, ParserConfigurationException, SAXException{ // LOGGER.info("Loading W3C Document from file: '"+file+"' and encoding '"+encoding+"' as an input stream"); // InputStream fis = new FileInputStream(file); // return loadW3CDocumentFromInputStream(fis, encoding); // } /** * Prints a {@link org.w3c.dom.Document} to an {@link OutputStream}, in UTF-8. * @param outputDocument The output document * @param outputStream The output stream * @throws TransformerFactoryConfigurationError * @throws TransformerConfigurationException * @throws TransformerException */ private static void printW3CDocumentToOutputStream(Document outputDocument, OutputStream outputStream) throws TransformerFactoryConfigurationError, TransformerConfigurationException, TransformerException { printW3CDocumentToOutputStream(outputDocument, outputStream, "UTF-8"); } /** * Prints a {@link org.w3c.dom.Document} to an {@link OutputStream}. * @param outputDocument The output document * @param outputStream The output stream * @param charset the charset. * @throws TransformerFactoryConfigurationError * @throws TransformerConfigurationException * @throws TransformerException */ private static void printW3CDocumentToOutputStream(Document outputDocument, OutputStream outputStream, String charset) throws TransformerFactoryConfigurationError, TransformerConfigurationException, TransformerException { LOGGER.info("Printing W3C Document to an output stream with encoding '" + charset + "'"); TransformerFactory tFactory = TransformerFactory.newInstance(); Transformer transformer = tFactory.newTransformer(); transformer.setOutputProperty(OutputKeys.METHOD, "xml"); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty(OutputKeys.ENCODING, charset); LOGGER.debug("XML output properties: " + transformer.getOutputProperties().toString()); DOMSource source = new DOMSource(outputDocument); Writer outputWriter = new XmlStreamWriter(outputStream, charset); StreamResult result = new StreamResult(outputWriter); transformer.transform(source, result); LOGGER.info("Document printed"); } /** * Main method, executed at application startup * @param args CLI arguments (for more details, just run the program with the --help option). * @throws Exception if any exception is thrown anywhere */ public static void main(String[] args) throws Exception { LOGGER.info("Program starts"); CommandLine cmdLine; try { // create the command line parser CommandLineParser parser = new BasicParser(); // parse the command line arguments cmdLine = parser.parse(CMD_LINE_OPTIONS, args); } catch (ParseException e) { String fatalMessage = "Error at parsing command line: " + e.getMessage(); LOGGER.fatal(fatalMessage); System.err.println(fatalMessage); System.exit(1); return; //System.exit() makes this statement unreachable. However, 'return' is necessary to prevent the compiler from crying when I use 'cmdLine' outside the try-catch. } if (cmdLine.hasOption(CMD_LINE_OPTION_HELP)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(CMD_LINE_SYNTAX, CMD_LINE_OPTIONS); System.exit(0); } if (!cmdLine.hasOption(CMD_LINE_OPTION_SCHEMATRON_FILE)) { System.err.println("Error at parsing command line: No input schematron is provided."); System.exit(1); } Document inputSchematronDocument = loadW3CDocumentFromInputFile( new File(cmdLine.getOptionValue(CMD_LINE_OPTION_SCHEMATRON_FILE))); Document candidateDocument; if (cmdLine.hasOption(CMD_LINE_OPTION_INPUT)) { try (InputStream candidateInputStream = new FileInputStream( new File(cmdLine.getOptionValue(CMD_LINE_OPTION_INPUT)))) { candidateDocument = loadW3CDocumentFromInputStream(candidateInputStream); } } else { candidateDocument = loadW3CDocumentFromInputStream(System.in); } String phase = cmdLine.getOptionValue(CMD_LINE_OPTION_PHASE); boolean allowForeign = cmdLine.hasOption(CMD_LINE_OPTION_ALLOW_FOREIGN); boolean diagnose = !cmdLine.hasOption(CMD_LINE_OPTION_SKIP_DIAGNOSE); boolean generatePaths = !cmdLine.hasOption(CMD_LINE_OPTION_SKIP_GENERATE_PATHS); boolean generateFiredRule = !cmdLine.hasOption(CMD_LINE_OPTION_SKIP_GENERATE_FIRED_RULE); boolean ignoreFileNotAvailableAtDocumentFunction = cmdLine .hasOption(CMD_LINE_OPTION_SKIP_OPTIONAL_FILES_ERRORS); ValidationMethod currentSchematronValidationMethod = new SaxonXSLT2BasedValidationMethod( ignoreFileNotAvailableAtDocumentFunction, allowForeign, diagnose, generatePaths, generateFiredRule); Document svrlResult = currentSchematronValidationMethod.performValidation(candidateDocument, inputSchematronDocument, phase); if (cmdLine.hasOption(CMD_LINE_OPTION_OUTPUT)) { OutputStream outputStream = new FileOutputStream( new File(cmdLine.getOptionValue(CMD_LINE_OPTION_OUTPUT))); printW3CDocumentToOutputStream(svrlResult, outputStream); } else { printW3CDocumentToOutputStream(svrlResult, System.out); } } }