marytts.datatypes.MaryData.java Source code

Java tutorial

Introduction

Here is the source code for marytts.datatypes.MaryData.java

Source

/**
 * Copyright 2000-2006 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */
package marytts.datatypes;

import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;

import javax.sound.sampled.AudioFileFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;

import marytts.modules.synthesis.Voice;
import marytts.server.MaryProperties;
import marytts.server.http.MaryHttpServerUtils;
import marytts.util.MaryUtils;
import marytts.util.data.audio.AppendableSequenceAudioInputStream;
import marytts.util.data.audio.SequenceAudioInputStream;
import marytts.util.data.text.UncloseableBufferedReader;
import marytts.util.dom.DomUtils;
import marytts.util.dom.MaryNormalisedWriter;
import marytts.util.io.FileUtils;
import marytts.util.io.LoggingErrorHandler;
import marytts.util.io.ReaderSplitter;
import marytts.util.string.StringUtils;

import org.apache.http.HttpResponse;
import org.apache.http.nio.entity.NByteArrayEntity;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import com.sun.speech.freetts.Utterance;

/**
 * A representation of any type of mary data, be it input, intermediate or
 * output data. The "technical" representation of the read data is hidden from
 * the caller, but can be accessed on request.
 * Internally, the data is appropriately represented according to this data's
 * type, i.e. as a String containing plain text, an XML DOM tree, or an
 * input stream containing audio data.
 * @author Marc Schr&ouml;der
 */
public class MaryData {
    private MaryDataType type;
    private Locale locale;
    private String outputParams = null;
    // Only one of the following data representations should be non-null
    // for a given instance; which one depends on our type.
    private Document xmlDocument = null;
    private String plainText = null;
    private AudioInputStream audio = null;
    private AudioFileFormat audioFileFormat = null;
    private List<Utterance> utterances = null;
    private Logger logger = MaryUtils.getLogger("IO");

    // for plainText, allow additional information:
    private Voice defaultVoice = null;
    private String defaultStyle = "";
    private String defaultEffects = "";

    // The following XML I/O helpers are only initialised
    // if actually needed.
    private MaryNormalisedWriter writer = null;

    private boolean doValidate;
    private boolean doWarnClient = false;

    public MaryData(MaryDataType type, Locale locale) {
        this(type, locale, false);
    }

    public MaryData(MaryDataType type, Locale locale, boolean createStubDocument) {
        if (type == null)
            throw new NullPointerException("Received null type for MaryData");
        this.type = type;
        this.locale = locale;
        // The following is the default setting for module output (we suppose
        // that for the input data, setValidating() is called as appropriate):
        doValidate = MaryProperties.getBoolean("maryxml.validate.modules", false);
        if (createStubDocument && type.isMaryXML()) {
            xmlDocument = MaryXML.newDocument();
        }
    }

    public boolean getValidating() {
        return doValidate;
    }

    public void setValidating(boolean doValidate) throws ParserConfigurationException {
        this.doValidate = doValidate;
    }

    @Deprecated
    public boolean getWarnClient() {
        return doWarnClient;
    }

    @Deprecated
    public void setWarnClient(boolean doWarnClient) {
    }

    public MaryDataType getType() {
        return type;
    }

    public Locale getLocale() {
        return locale;
    }

    /**
     * Read data from input stream <code>is</code>,
     * in the appropriate way as determined by our <code>type</code>.
     */
    public void readFrom(InputStream is) throws ParserConfigurationException, SAXException, IOException,
            TransformerConfigurationException, TransformerException {
        readFrom(is, null);
    }

    /**
      * Read data from input stream <code>is</code>,
      * in the appropriate way as determined by our <code>type</code>.
     * @param is the InputStream from which to read.
     * @param endMarker a string marking end of file. If this is null, read until
     * end-of-file; if it is non-null, read up to (and including) the first line containing
     * the end marker string. This will be ignored for audio data.
      */
    public void readFrom(InputStream is, String endMarker) throws ParserConfigurationException, SAXException,
            IOException, TransformerConfigurationException, TransformerException {
        if (type.isUtterances())
            throw new IOException("Cannot read into utterance-based data type!");

        if (type.isXMLType() || type.isTextType())
            readFrom(new InputStreamReader(is, "UTF-8"), endMarker);
        else { // audio
               // ignore endMarker
            setAudio((AudioInputStream) is);
        }
    }

    /**
     * Read data from reader <code>r</code>
     * in the appropriate way as determined by our <code>type</code>.
     * Only XML and Text data can be read from a reader, audio data cannot.
     */
    public void readFrom(Reader from) throws ParserConfigurationException, SAXException, IOException {
        String inputData = FileUtils.getReaderAsString(from);
        setData(inputData);
    }

    /**
     * Read data from reader <code>r</code>
     * in the appropriate way as determined by our <code>type</code>.
     * Only XML and Text data can be read from a reader, audio data cannot.
     * "Helpers" needed to read the data, such as XML parser objects,
     * are created when they are needed.
     * If doWarnClient is set to true, warning and error messages related
     * to XML parsing are logged to the log category connected to the client
     * from which this request originated.
     * @param from the Reader from which to read.
     * @param endMarker a string marking end of file. If this is null, read until
     * end-of-file; if it is non-null, read up to (and including) the first line containing
     * the end marker string.
     */
    public void readFrom(Reader from, String endMarker)
            throws ParserConfigurationException, SAXException, IOException {
        // For the case that the data to be read it is not
        // followed by end-of-file, we use a ReaderSplitter which
        // provides a reader artificially "inserting" an end-of-file
        // after a line containing the pattern given in endMarker.
        Reader r = from;
        if (endMarker != null) {
            ReaderSplitter fromSplitter = new ReaderSplitter(from, endMarker);
            r = fromSplitter.nextReader();
        }
        readFrom(r);

    }

    /**
     * Set the content data of this MaryData object from the given String.
     * For XML data ({@link MaryDataType#isXMLType()}), parse the String representation of the data into a DOM tree.
     * @param dataString string representation of the input data.
     * @throws IOException 
     * @throws SAXException 
     * @throws IllegalArgumentException if this method is called for MaryDataTypes that are neither text nor XML.
     */
    public void setData(String dataString) throws ParserConfigurationException, SAXException, IOException {
        // First, some data cleanup:
        dataString = StringUtils.purgeNonBreakingSpaces(dataString);
        // Now, deal with it.
        if (type.isXMLType()) {
            logger.debug("Parsing XML input (" + (doValidate ? "" : "non-") + "validating): " + dataString);
            xmlDocument = DomUtils.parseDocument(dataString, doValidate);
        } else if (type.isTextType()) {
            logger.debug("Setting text input: " + dataString);
            plainText = dataString;
        } else {
            throw new IllegalArgumentException("Cannot set data of type " + type + " from a string");
        }
    }

    /**
     * Write our internal representation to output stream <code>os</code>,
     * in the appropriate way as determined by our <code>type</code>.
     */
    public void writeTo(OutputStream os) throws TransformerConfigurationException, FileNotFoundException,
            TransformerException, IOException, Exception {
        if (type.isUtterances())
            throw new IOException("Cannot write out utterance-based data type!");

        if (type.isXMLType()) {
            if (writer == null)
                writer = new MaryNormalisedWriter();
            if (logger.getEffectiveLevel().equals(Level.DEBUG)) {
                ByteArrayOutputStream debugOut = new ByteArrayOutputStream();
                writer.output(xmlDocument, debugOut);
                logger.debug(debugOut.toString());
            }
            writer.output(xmlDocument, new BufferedOutputStream(os));
        } else if (type.isTextType()) { // caution: XML types are text types!
            writeTo(new OutputStreamWriter(os, "UTF-8"));
        } else { // audio
            logger.debug("Writing audio output, frame length " + audio.getFrameLength());
            AudioSystem.write(audio, audioFileFormat.getType(), os);
            os.flush();
            os.close();
        }
    }

    /*
    public void writeTo(HttpResponse response)
    throws TransformerConfigurationException, FileNotFoundException, TransformerException, IOException, Exception {
    if (type.isUtterances())
        throw new IOException("Cannot write out utterance-based data type!");
        
    if (type.isXMLType()) 
    {
        if (writer == null)
            writer = new MaryNormalisedWriter();
        if (logger.getEffectiveLevel().equals(Level.DEBUG)) {
            ByteArrayOutputStream debugOut = new ByteArrayOutputStream();
            writer.output(xmlDocument, debugOut);
            logger.debug(debugOut.toString());
        }
            
        //writer.output(xmlDocument, new BufferedOutputStream(os));
        
        ByteArrayOutputStream os = new ByteArrayOutputStream();
        writer.output(xmlDocument, new BufferedOutputStream(os));
        NByteArrayEntity body = new NByteArrayEntity(os.toByteArray());
        body.setContentType("text/html; charset=UTF-8");
        response.setEntity(body);
    } 
    else if (type.isTextType()) // caution: XML types are text types!
    { 
        //writeTo(new OutputStreamWriter(os, "UTF-8"));
            
        ByteArrayOutputStream os = new ByteArrayOutputStream();
        writeTo(new OutputStreamWriter(os, "UTF-8"));
        NByteArrayEntity body = new NByteArrayEntity(os.toByteArray());
        body.setContentType("text/html; charset=UTF-8");
        response.setEntity(body);
    } 
    else // audio
    { 
        logger.debug("Writing audio output, frame length "+audio.getFrameLength());
        //AudioSystem.write(audio, audioFileFormat.getType(), os);
        //os.flush();
            
        ByteArrayOutputStream os = new ByteArrayOutputStream();
        AudioSystem.write(audio, audioFileFormat.getType(), os);
        os.flush();
            
        MaryHttpServerUtils.toHttpResponse(os.toByteArray(), response);   
    }
    }
    */

    /**
     * Write our internal representation to writer <code>w</code>,
     * in the appropriate way as determined by our <code>type</code>.
     * Only XML and Text data can be written to a writer, audio data cannot.
     * "Helpers" needed to read the data, such as XML parser objects,
     * are created when they are needed.
     */
    public void writeTo(Writer w) throws TransformerConfigurationException, FileNotFoundException,
            TransformerException, IOException, Exception {
        if (type.isUtterances())
            throw new IOException("Cannot write out utterance-based data type!");
        if (type.isXMLType()) {
            throw new IOException("Better write XML data to an OutputStream, not to a Writer");
        } else if (type.isTextType()) { // caution: XML types are text types!
            w.write(plainText);
            w.flush();
            logger.debug("Writing Text output:\n" + plainText);
        } else { // audio - cannot write this to a writer
            throw new Exception("Illegal attempt to write audio data to a character Writer");
        }
    }

    public Object getData() {
        if (type.isXMLType()) {
            return xmlDocument;
        } else if (type.isTextType()) {
            return plainText;
        } else if (type.isUtterances()) {
            return utterances;
        } else { // audio
            return audio;
        }
    }

    public String getPlainText() {
        return plainText;
    }

    public void setPlainText(String plainText) {
        this.plainText = plainText;
    }

    public Document getDocument() {
        return xmlDocument;
    }

    public void setDocument(Document xmlDocument) {
        this.xmlDocument = xmlDocument;
    }

    public AudioInputStream getAudio() {
        return audio;
    }

    /**
     * Set the audio data. This will discard any previously set audio data. If audio data is to be
     * appended, consider appendAudio().
     * @param audio 
     */
    public void setAudio(AudioInputStream audio) {
        this.audio = audio;
    }

    public List<Utterance> getUtterances() {
        return utterances;
    }

    public void setUtterances(List<Utterance> utterances) {
        this.utterances = utterances;
    }

    public void setDefaultVoice(Voice voice) {
        if (voice == null) {
            return;
        }
        // check that voice locale fits before accepting the voice:
        Locale voiceLocale = null;
        if (voice != null)
            voiceLocale = voice.getLocale();
        Locale docLocale = getLocale();
        if (docLocale == null && getType().isXMLType() && getDocument() != null
                && getDocument().getDocumentElement().hasAttribute("xml:lang")) {
            docLocale = MaryUtils.string2locale(getDocument().getDocumentElement().getAttribute("xml:lang"));
        }
        if (docLocale != null && voiceLocale != null
                && !(MaryUtils.subsumes(docLocale, voiceLocale) || MaryUtils.subsumes(voiceLocale, docLocale))) {
            logger.warn("Voice `" + voice.getName() + "' does not match document locale `" + docLocale
                    + "' -- ignoring!");
        }
        this.defaultVoice = voice;
    }

    public Voice getDefaultVoice() {
        return defaultVoice;
    }

    public void setDefaultStyle(String style) {
        defaultStyle = style;
    }

    public String getDefaultStyle() {
        return defaultStyle;
    }

    public void setDefaultEffects(String effects) {
        defaultEffects = effects;
    }

    public String getDefaultEffects() {
        return defaultEffects;
    }

    /**
     * The audio file format is required only for data types serving as input
     * to modules producing AUDIO data (e.g., MBROLA data), as well as for the
     * AUDIO data itself. It should be set by the calling code before passing
     * the data to the module producing AUDIO data.
     */

    public void setAudioFileFormat(AudioFileFormat audioFileFormat) {
        this.audioFileFormat = audioFileFormat;
    }

    public AudioFileFormat getAudioFileFormat() {
        return audioFileFormat;
    }

    public void append(MaryData md) {
        if (md == null)
            throw new NullPointerException("Received null marydata");
        if (!md.getType().equals(this.getType()))
            throw new IllegalArgumentException("Cannot append mary data of type `" + md.getType().name()
                    + "' to mary data of type `" + this.getType().name() + "'");
        if (getType().isXMLType()) {
            NodeList kids = md.getDocument().getDocumentElement().getChildNodes();
            logger.debug("Appending " + kids.getLength() + " nodes to MaryXML structure");
            Element docEl = this.getDocument().getDocumentElement();
            for (int i = 0; i < kids.getLength(); i++) {
                docEl.appendChild(this.getDocument().importNode(kids.item(i), true));
            }
        } else if (getType().isTextType()) {
            // Attention: XML type is a text type!
            if (this.plainText == null) {
                this.plainText = md.getPlainText();
            } else {
                this.plainText = this.plainText + "\n\n" + md.getPlainText();
            }
        } else if (getType().equals(MaryDataType.get("AUDIO"))) {
            appendAudio(md.getAudio());
        } else {
            throw new UnsupportedOperationException(
                    "Cannot append two mary data items of type `" + getType() + "'");
        }
    }

    /**
     * For audio data, append more audio data to the one currently present. If no audio data is set yet,
     * this call is equivalent to setAudio().
     * @param audioToAppend the new audio data to append
     */
    public void appendAudio(AudioInputStream audioToAppend) {
        if (this.audio == null)
            setAudio(audioToAppend);
        else if (this.audio instanceof AppendableSequenceAudioInputStream)
            ((AppendableSequenceAudioInputStream) this.audio).append(audioToAppend);
        else
            this.audio = new SequenceAudioInputStream(this.audio.getFormat(),
                    Arrays.asList(new AudioInputStream[] { this.audio, audioToAppend }));
    }

    public void setOutputParams(String params) {
        this.outputParams = params;
    }

    public String getOutputParams() {
        return outputParams;
    }
}