Java tutorial
/* ---------------------------------------------------------------------- * * Copyright (c) 2002-2009 The MITRE Corporation * * Except as permitted below * ALL RIGHTS RESERVED * * The MITRE Corporation (MITRE) provides this software to you without * charge to use for your internal purposes only. Any copy you make for * such purposes is authorized provided you reproduce MITRE's copyright * designation and this License in any such copy. You may not give or * sell this software to any other party without the prior written * permission of the MITRE Corporation. * * The government of the United States of America may make unrestricted * use of this software. * * This software is the copyright work of MITRE. No ownership or other * proprietary interest in this software is granted you other than what * is granted in this license. * * Any modification or enhancement of this software must inherit this * license, including its warranty disclaimers. You hereby agree to * provide to MITRE, at no charge, a copy of any such modification or * enhancement without limitation. * * MITRE IS PROVIDING THE PRODUCT "AS IS" AND MAKES NO WARRANTY, EXPRESS * OR IMPLIED, AS TO THE ACCURACY, CAPABILITY, EFFICIENCY, * MERCHANTABILITY, OR FUNCTIONING OF THIS SOFTWARE AND DOCUMENTATION. IN * NO EVENT WILL MITRE BE LIABLE FOR ANY GENERAL, CONSEQUENTIAL, * INDIRECT, INCIDENTAL, EXEMPLARY OR SPECIAL DAMAGES, EVEN IF MITRE HAS * BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. * * You accept this software on the condition that you indemnify and hold * harmless MITRE, its Board of Trustees, officers, agents, and * employees, from any and all liability or damages to third parties, * including attorneys' fees, court costs, and other related costs and * expenses, arising out of your use of this software irrespective of the * cause of said liability. * * The export from the United States or the subsequent reexport of this * software is subject to compliance with United States export control * and munitions control restrictions. You agree that in the event you * seek to export this software you assume full responsibility for * obtaining all necessary export licenses and approvals and for assuring * compliance with applicable reexport restrictions. * * ---------------------------------------------------------------------- * * NOTICE * * This software was produced for the U. S. Government * under Contract No. W15P7T-09-C-F600, and is * subject to the Rights in Noncommercial Computer Software * and Noncommercial Computer Software Documentation * Clause 252.227-7014 (JUN 1995). * * (c) 2009 The MITRE Corporation. All Rights Reserved. * * ---------------------------------------------------------------------- * */ /* * Copyright (c) 2002-2006 The MITRE Corporation * * Except as permitted below * ALL RIGHTS RESERVED * * The MITRE Corporation (MITRE) provides this software to you without * charge to use for your internal purposes only. Any copy you make for * such purposes is authorized provided you reproduce MITRE's copyright * designation and this License in any such copy. You may not give or * sell this software to any other party without the prior written * permission of the MITRE Corporation. * * The government of the United States of America may make unrestricted * use of this software. * * This software is the copyright work of MITRE. No ownership or other * proprietary interest in this software is granted you other than what * is granted in this license. * * Any modification or enhancement of this software must inherit this * license, including its warranty disclaimers. You hereby agree to * provide to MITRE, at no charge, a copy of any such modification or * enhancement without limitation. * * MITRE IS PROVIDING THE PRODUCT "AS IS" AND MAKES NO WARRANTY, EXPRESS * OR IMPLIED, AS TO THE ACCURACY, CAPABILITY, EFFICIENCY, * MERCHANTABILITY, OR FUNCTIONING OF THIS SOFTWARE AND DOCUMENTATION. IN * NO EVENT WILL MITRE BE LIABLE FOR ANY GENERAL, CONSEQUENTIAL, * INDIRECT, INCIDENTAL, EXEMPLARY OR SPECIAL DAMAGES, EVEN IF MITRE HAS * BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. * * You accept this software on the condition that you indemnify and hold * harmless MITRE, its Board of Trustees, officers, agents, and * employees, from any and all liability or damages to third parties, * including attorneys' fees, court costs, and other related costs and * expenses, arising out of your use of this software irrespective of the * cause of said liability. * * The export from the United States or the subsequent reexport of this * software is subject to compliance with United States export control * and munitions control restrictions. You agree that in the event you * seek to export this software you assume full responsibility for * obtaining all necessary export licenses and approvals and for assuring * compliance with applicable reexport restrictions. */ package org.mitre.jawb.io; import java.io.File; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.InputStream; import java.io.IOException; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.Writer; import java.net.MalformedURLException; import java.net.URISyntaxException; import java.net.URI; import java.net.URL; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import org.dom4j.Attribute; import org.dom4j.Branch; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.DocumentType; import org.dom4j.Element; import org.dom4j.Node; import org.dom4j.io.OutputFormat; import org.dom4j.io.SAXReader; import org.dom4j.io.XMLWriter; import org.xml.sax.*; import org.xml.sax.ext.LexicalHandler; import org.xml.sax.helpers.DefaultHandler; import javax.xml.parsers.SAXParserFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import org.mitre.jawb.Jawb; import org.mitre.jawb.gui.GUIUtils; import org.mitre.jawb.tasks.Task; /** * Utilities to help with ATLAS files. Note that the methods of this class are * <strong>not</strong> thread safe. */ public class ATLASHelper { public static final String SIGNAL_DATA = "CALLISTO::SIGNAL::DATA"; public static final String SIGNAL_CHECKSUM = "CALLISTO::SIGNAL::CHECKSUM"; public static final int DEBUG = 0; /** Set cleared and re-used on each call to {@link #getSupportingTasks} */ private static List supportingTasks = null; /** SAX Handler that retrieves the URI of the MAIA Scheme of an .aif file */ private static ATLASSaxHandler saxHandler = null; /** SAX parser that retrieves the URI of the MAIA Scheme of an .aif file */ private static SAXParser saxParser = null; /** * Deterimine the Tasks which support the specified AIF file, based on the * MAIA schema it uses, and return references to those Tasks as a * List. Generally this will only return one of the tasks supplied (if * any!), but nothing restricts multiple Tasks from working with the same * MAIA scheme. If the file is not an AIF file, <code>null</code> is * returned to indicate such.<p> * * This method reuses the List object returned, so don't store reference to * it, and don't expect this to be thread safe.<p> * * @param aifURI the <i>absolute</i> URI of an aif file. URI is used to * ensure proper encoding, and to access the data it is * converted to URL. * * @return A List of Task objects which support the specified AIF file. It * may be empty if no Task support the AIF file, or * <code>null</code> to indicate that the file is not an AIF * file. * * @throws IllegalArgumentException If aifURI is null or relative (non-URL) * @throws IOException if there was an error retrieving the URL of the MAIA * Scheme from the .aif file * * @see URI#toURL */ public static List getSupportingTasks(URI aifURI) throws IOException { if (aifURI == null || !aifURI.isAbsolute()) throw new IllegalArgumentException("AIF URI is not absolute: " + aifURI); if (saxHandler == null) initSAXHandler(); saxParse(aifURI.toString(), saxHandler); String maiaString = saxHandler.getMaiaURIString(); if (DEBUG > 0) System.err.println("ATHelp.getSupTask: maia=" + maiaString); if (maiaString == null) // invalid .aif file return null; supportingTasks.clear(); // Task task = findTask (maiaString, EXTERNAL); // //if (task != null) // supportingTasks.add (task); // //return supportingTasks; // RK 10/14/05 Actually find all tasks instead of just one return findTasks(maiaString, EXTERNAL); } public static void saxParse(String uriString, ATLASSaxHandler saxHandler) throws IOException { saxHandler.reset(); try { // Parse the input URI if (DEBUG > 0) System.err.println("ATHelp.saxParse: parsing..."); saxParser.parse(uriString, saxHandler); } catch (EndOfProcessingException eope) { // done in good form! (exception forces parser to quit early) } catch (SAXException sxe) { // Error generated by this application Exception x = sxe; if (sxe.getException() != null) x = sxe.getException(); throw new RuntimeException(sxe.getMessage(), x); } } /** * Read in an .aif file from the specified URI, and write it out with * localized refernces to the output stream. * @param aifURI location of input .aif file. <strong>MUST BE ABSOLUTE.</strong> * @param out stream that localized version of input is written to * @param cheatMap A map of undocumented values that we use in Callisto * to store data in the AIF which ATLAS won't. */ public static void localize(URI aifURI, OutputStream out, Map cheatMap) throws IOException { if (DEBUG > 0) System.err.println("ATHelp.localize: aifURI=" + aifURI); Document doc = parse(aifURI); DocumentType doctype = doc.getDocType(); Element corpus = doc.getRootElement(); Element signal = getTextSignal(corpus); // Replace external ATLAS DTD reference w/ local reference URI localDTD = URLUtils.badURLToURI(Jawb.getResource("aif.dtd")); doctype.setSystemID(localDTD.toString()); // Retrieve 'Cononical' MAIA Scheme and replace w/ local reference String maiaString = corpus.attributeValue("schemeLocation"); Task task = findTask(maiaString, EXTERNAL); if (task == null) { // it could be an old file that still has a local MAIA URL String escapedMaia = maiaString.replaceAll(" ", "%20"); if ((task = findTask(escapedMaia, LOCAL)) == null) throw new RuntimeException("Unrecognized Task: MAIA URI=" + maiaString); } corpus.addAttribute("schemeLocation", task.getLocalMaiaURI().toString()); // It's possible to /not/ have a text signal referenced if (signal != null) { // If signal is relative URI, convert to absolute, // resolving against .aif file String signalHREF = signal.attributeValue("href"); try { String path = aifURI.getRawPath(); URI aifBase = aifURI.resolve(path.substring(0, path.lastIndexOf('/') + 1)); URI signalURI = new URI(signalHREF); URI resolvedURI = aifBase.resolve(signalURI); if (DEBUG > 0) { System.err.println("ATHelp.localize:\n base= " + aifBase + "\n signal= " + signalURI + "\n resolved= " + resolvedURI); } signal.addAttribute("href", resolvedURI.toString()); } catch (URISyntaxException x) { System.err.println("WARNING: aif file specifies invalid signal URI:" + " not resolving:\n aifURI= " + aifURI + "\n signalURI=" + signalHREF); System.err.println(x.getMessage()); } // ATLAS ignores encoding so use the cheats cheatMap.put("encoding", signal.attributeValue("encoding")); cheatMap.put("mimeType", signal.attributeValue("mimeType")); cheatMap.put(SIGNAL_CHECKSUM, signal.attributeValue("checksum")); Element body = signal.element("body"); if (body != null) { String signalEncoding = body.attributeValue("encoding"); if (!"Base64".equalsIgnoreCase(signalEncoding)) System.err.println("Unrecognized embeded signal encoding: '" + signalEncoding + "'"); else { String embedded = body.getText(); cheatMap.put(SIGNAL_DATA, Base64.decode(embedded)); } } } // if (signal != null) dump(doc, out); } /** * Create a temp file with similar name, and in the same directory as the * specified URI. This is just a wrapper around {@link File#createTempFile} * with some added fluff. Prefix is the name of the 'file' URI, and the * suffix is the suffix of the input file or "~". * * @throws IllegalArgumentException if uri is not absolute or a 'file' URI */ public static final File createTempFile(URI uri) throws IOException { return createTempFile(new File(uri)); } /** @see #createTempFile (URI) */ public static final File createTempFile(File base) throws IOException { String name = base.getName(); int extPos = name.lastIndexOf('.'); if (extPos < 0) extPos = name.length(); File tmp = File.createTempFile(name.substring(0, extPos), name.substring(extPos) + "~", base.getParentFile()); return tmp; } /** * Read in an .aif file from the specified URI, and write it out with * localized refernces to the output stream. * @param aifURI location of input .aif file. <strong>MUST BE ABSOLUTE.</strong> * @param out stream that localized version of input is written to * @param relativize rewrite the absolute signal URI as relative based on * input * @param cheatMap A map of undocumented values that we use in Callisto * to store data in the AIF which ATLAS won't. */ public static void externalize(URI aifURI, OutputStream out, boolean relativize, Map cheatMap) throws IOException { if (DEBUG > 0) System.err.println("ATHelp.externalize: aifURI=" + aifURI); Document doc = parse(aifURI); DocumentType doctype = doc.getDocType(); Element corpus = doc.getRootElement(); Element signal = getTextSignal(corpus); // Replace local ATLAS DTD reference w/ external reference doctype.setSystemID("http://www.nist.gov/speech/atlas/aif.dtd"); // Replace local MAIA Scheme w/ external reference String maiaString = corpus.attributeValue("schemeLocation"); Task task = findTask(maiaString, LOCAL); if (task == null) System.err.println("Unable to extern Maia: Unknown:\n " + maiaString); else corpus.addAttribute("schemeLocation", task.getMaiaURI().toString()); // It's possible to /not/ have a text signal referenced if (signal != null) { // Perhaps replace absolute URI with relative URI if (relativize) { String signalHREF = signal.attributeValue("href"); try { String path = aifURI.getRawPath(); URI aifBase = aifURI.resolve(path.substring(0, path.lastIndexOf('/') + 1)); URI signalURI = new URI(signalHREF); URI relativeURI = aifBase.relativize(signalURI); if (DEBUG > 0) { System.err.println("ATHelp.extern:\n base= " + aifBase + "\n signal= " + signalURI + "\n relative= " + relativeURI); } signal.addAttribute("href", relativeURI.toString()); } catch (URISyntaxException x) { System.err.println("WARNING: aif file specifies invalid signal URI:" + " not relativizing:\n aifURI= " + aifURI + "\n signalURI=" + signalHREF); System.err.println(x.getMessage()); } } // ATLAS ignores encoding so use the cheats signal.addAttribute("encoding", (String) cheatMap.get("encoding")); signal.addAttribute("mimeType", (String) cheatMap.get("mimeType")); if (cheatMap.get(SIGNAL_DATA) != null) { String embedded = Base64.encode((byte[]) cheatMap.get(SIGNAL_DATA)); Element body = signal.addElement("body"); body.addAttribute("encoding", "Base64"); body.addText(embedded); } } // if (signal != null) dump(doc, out); } /** * Returns signal element who's type is "text". May return null. */ private static Element getTextSignal(Element corpus) { Element signal = null; // Do not assume there is only one signal Iterator signalElts = corpus.elementIterator("SimpleSignal"); while (signalElts.hasNext()) { // find the signal with type attribute 'text' Element signalElt = (Element) signalElts.next(); String signalName = signalElt.attributeValue("type"); if (signalName.equalsIgnoreCase("text")) { signal = signalElt; break; } } // TODO: if there is no signal with type 'text', we're in trouble... return signal; } public static Document parse(URI aifURI) throws IOException { SAXReader reader = new SAXReader(); // actually, this is ok, if we use the entity resolver reader.setEntityResolver(new ATLASResolver()); reader.setIncludeExternalDTDDeclarations(false); try { // URI.toURL() fails when opaque. don't expect an opaque here, but... if (DEBUG > 0) System.err.println("ATHelp.parse: aifURI=" + aifURI); return reader.read(new URL(aifURI.toString())); } catch (DocumentException x) { IOException ex = new IOException("Unable to parse input aif"); ex.initCause(x); throw ex; } } public static Document parse(InputStream in) throws IOException { SAXReader reader = new SAXReader(); // actually, this is ok, if we use the entity resolver reader.setEntityResolver(new ATLASResolver()); reader.setIncludeExternalDTDDeclarations(false); try { // URI.toURL() fails when opaque. don't expect an opaque here, but... return reader.read(in); } catch (DocumentException x) { IOException ex = new IOException("Unable to parse input aif"); ex.initCause(x); throw ex; } } public static final boolean EXTERNAL = true; public static final boolean LOCAL = false; /** Look up a task by the specified MAIA URI, checking against the Tasks * advertized MAIA URI: either external (cononical) or local, as specified */ public static Task findTask(String uri, boolean external) { if (DEBUG > 1) System.err.println("ATHelp.findTask: finding task (" + (external ? "EXT" : "LOCAL") + ")\n " + uri); // O(n)... just hope you don't have that many tasks. If it get's to be a // real issue, we'll use a hash tree Iterator iter = Jawb.getTasks().iterator(); while (iter.hasNext()) { Task task = (Task) iter.next(); URI taskURI = external ? task.getMaiaURI() : task.getLocalMaiaURI(); if (taskURI.toString().equals(uri)) return task; } return null; } public static List findTasks(String uri, boolean external) { List tasks = new LinkedList(); if (DEBUG > 1) System.err.println("ATHelp.findTasks: finding tasks (" + (external ? "EXT" : "LOCAL") + ")\n " + uri); // O(n)... just hope you don't have that many tasks. If it get's to be a // real issue, we'll use a hash tree Iterator iter = Jawb.getTasks().iterator(); while (iter.hasNext()) { Task task = (Task) iter.next(); URI taskURI = external ? task.getMaiaURI() : task.getLocalMaiaURI(); if (taskURI.toString().equals(uri)) tasks.add(task); } return tasks; } public static void dump(Document doc, OutputStream out) throws IOException { // Pretty print the document to System.out OutputFormat format = OutputFormat.createPrettyPrint(); format.setEncoding("US-ASCII"); Writer writer = new OutputStreamWriter(out, "US-ASCII"); XMLWriter xmlWriter = new XMLWriter(writer, format); xmlWriter.write(doc); xmlWriter.close(); } /***********************************************************************/ /* Initialization */ /***********************************************************************/ private static void initSAXHandler() { if (DEBUG > 0) System.err.println("ATHelp.initSAXHandler"); supportingTasks = new LinkedList(); saxHandler = new ATLASSaxHandler(); try { // Use the default (non-validating) parser SAXParserFactory factory = SAXParserFactory.newInstance(); saxParser = factory.newSAXParser(); XMLReader xmlReader = saxParser.getXMLReader(); xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", saxHandler); } catch (Exception x) { throw new RuntimeException("Unable to create parser to Retrieve MAIA", x); } if (DEBUG > 0) System.err.println("ATHelp.initSAXHandler: initialized"); } public static void main(String args[]) throws Exception { //String uri = args[0]; //Writer writer = new FileWriter ("C:/cygwin/tmp/regurgitation.aif"); String inSpec = "file:/C:/cygwin/tmp/example.aif.xml"; String outSpec = "file:/C:/cygwin/tmp/regurgitated.aif.xml"; URI aifURI = new URI(inSpec); URI outURI = new URI(outSpec); OutputStream out = new FileOutputStream(new File(outURI)); Map cheatMap = new HashMap(); //localize (aifURI, out, cheatMap); externalize(aifURI, out, true, cheatMap); out.close(); List supporting = getSupportingTasks(outURI); System.err.println("Supporting Tasks: " + supporting); } /** * Read in an .aif file from the specified URI, and write it out with * localized refernces to the output stream. * @param aifURI location of input .aif file. <strong>MUST BE ABSOLUTE.</strong> * @param out stream that localized version of input is written to * @param relativize rewrite the absolute signal URI as relative based on * input * @param cheatMap A map of undocumented values that we use in Callisto * to store data in the AIF which ATLAS won't. */ public static void externalize(URI aifURI, InputStream in, OutputStream out, boolean relativize, Map cheatMap) throws IOException { // if (DEBUG > 0) // System.err.println ("ATHelp.externalize: aifURI="+aifURI); Document doc = parse(in); DocumentType doctype = doc.getDocType(); Element corpus = doc.getRootElement(); Element signal = getTextSignal(corpus); // Replace local ATLAS DTD reference w/ external reference doctype.setSystemID("http://www.nist.gov/speech/atlas/aif.dtd"); // Replace local MAIA Scheme w/ external reference String maiaString = corpus.attributeValue("schemeLocation"); Task task = findTask(maiaString, LOCAL); if (task == null) System.err.println("Unable to extern Maia: Unknown:\n " + maiaString); else corpus.addAttribute("schemeLocation", task.getMaiaURI().toString()); // It's possible to /not/ have a text signal referenced if (signal != null) { // Perhaps replace absolute URI with relative URI if (relativize) { String signalHREF = signal.attributeValue("href"); try { String path = aifURI.getRawPath(); URI aifBase = aifURI.resolve(path.substring(0, path.lastIndexOf('/') + 1)); URI signalURI = new URI(signalHREF); URI relativeURI = aifBase.relativize(signalURI); if (DEBUG > 0) { System.err.println("ATHelp.extern:\n base= " + aifBase + "\n signal= " + signalURI + "\n relative= " + relativeURI); } signal.addAttribute("href", relativeURI.toString()); } catch (URISyntaxException x) { System.err.println("WARNING: aif file specifies invalid signal URI:" + " not relativizing:\n aifURI= " + aifURI + "\n signalURI=" + signalHREF); System.err.println(x.getMessage()); } } // ATLAS ignores encoding so use the cheats signal.addAttribute("encoding", (String) cheatMap.get("encoding")); signal.addAttribute("mimeType", (String) cheatMap.get("mimeType")); if (cheatMap.get(SIGNAL_DATA) != null) { String embedded = Base64.encode((byte[]) cheatMap.get(SIGNAL_DATA)); Element body = signal.addElement("body"); body.addAttribute("encoding", "Base64"); body.addText(embedded); } } // if (signal != null) dump(doc, out); } }