com.globalsight.terminology.exporter.MtfWriter.java Source code

Java tutorial

Introduction

Here is the source code for com.globalsight.terminology.exporter.MtfWriter.java

Source

/**
 *  Copyright 2009 Welocalize, Inc. 
 *  
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  
 *  You may obtain a copy of the License at 
 *  http://www.apache.org/licenses/LICENSE-2.0
 *  
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *  
 */

package com.globalsight.terminology.exporter;

import org.apache.log4j.Logger;

import com.globalsight.terminology.exporter.ExportUtil;

import com.globalsight.everest.company.CompanyThreadLocal;
import com.globalsight.everest.company.CompanyWrapper;
import com.globalsight.exporter.IWriter;
import com.globalsight.exporter.ExportOptions;

import com.globalsight.terminology.Entry;
import com.globalsight.terminology.TermbaseException;
import com.globalsight.terminology.TermbaseExceptionMessages;

import com.globalsight.util.FileUtil;
import com.globalsight.util.SessionInfo;

import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.Node;

import java.util.*;
import java.io.*;

/**
 * Writes an entry to a MultiTerm iX MTF file as directed by the
 * conversion settings in the supplied export options.
 */
public class MtfWriter implements IWriter, TermbaseExceptionMessages {
    private static final Logger CATEGORY = Logger.getLogger(MtfWriter.class);

    //
    // Private Member Variables
    //
    private com.globalsight.terminology.exporter.ExportOptions m_options;
    private PrintWriter m_output;
    private String m_filename;

    //
    // Constructors
    //

    public MtfWriter(ExportOptions p_options) {
        setExportOptions(p_options);
    }

    //
    // Interface Implementation -- IWriter
    //

    public void setExportOptions(ExportOptions p_options) {
        m_options = (com.globalsight.terminology.exporter.ExportOptions) p_options;
    }

    /**
     * Analyzes export options and returns an updated ExportOptions
     * object with a status whether the options are syntactically
     * correct, and column descriptors in case of CSV files.
     */
    public ExportOptions analyze() {
        return m_options;
    }

    /**
     * Writes the file header (eg for TBX).
     */
    public void writeHeader(SessionInfo p_session) throws IOException {
        m_filename = m_options.getFileName();

        //Export termbase to a folder with company specified.
        String companyId = CompanyThreadLocal.getInstance().getValue();
        String directory = ExportUtil.getExportDirectory();
        if (!companyId.equals(CompanyWrapper.SUPER_COMPANY_ID)) {
            directory = directory + CompanyWrapper.getCompanyNameById(companyId) + "/";
            new File(directory).mkdir();
        }
        String encoding = m_options.getJavaEncoding();

        if (encoding == null || encoding.length() == 0) {
            throw new IOException("invalid encoding " + m_options.getEncoding());
        }

        // We support only Unicode encodings for XML files: UTF-8 and
        // UTF-16 (little and big endian)
        String ianaEncoding = m_options.getEncoding();
        if (ianaEncoding.toUpperCase().startsWith("UTF-16")) {
            ianaEncoding = "UTF-16";
        }

        String filename = directory + m_filename;

        // XML files get written as Unicode (in UTF-8).
        FileOutputStream fos = new FileOutputStream(filename);
        FileUtil.writeBom(fos, ianaEncoding);
        m_output = new PrintWriter(new BufferedWriter(new OutputStreamWriter(fos, encoding)));

        m_output.print("<?xml version=\"1.0\" encoding=\"");
        m_output.print(ianaEncoding);
        m_output.println("\" ?>");
        m_output.println("<mtf>");

        checkIOError();
    }

    /**
     * Writes the file trailer (eg for TBX).
     */
    public void writeTrailer(SessionInfo p_session) throws IOException {
        m_output.println("</mtf>");

        m_output.close();
    }

    /**
     * Writes the next few entries to the file.
     *
     * @param p_entries ArrayList of String objects that are the XML
     * representation of the entries.
     */
    public void write(ArrayList p_entries, SessionInfo p_session) throws IOException {
        for (int i = 0; i < p_entries.size(); ++i) {
            write(p_entries.get(i), p_session);
        }
    }

    /**
     * Writes a single entry to the export file.
     *
     * @param p_object an XML string representing the entry.
     * @see Entry
     */
    public void write(Object p_object, SessionInfo p_session) throws IOException {
        String entryXml = (String) p_object;
        entryXml = entryXml.replaceAll("<concept>[^<]*</concept>", "");

        try {
            Entry entry = new Entry(entryXml);

            Document dom = entry.getDom();

            // Do we filter out system fields?
            if (m_options.getSystemFields().equalsIgnoreCase("false")) {
                dom = ExportUtil.removeSystemFields(dom);
            }

            entry.setDom(convertToMtf(dom));

            String xml = entry.getXml();
            m_output.println(xml);

            checkIOError();
        } catch (TermbaseException ex) {
            throw new IOException("internal error: " + ex.toString());
        }
    }

    /**
     * Converts a GlobalSight concept group to a MultiTerm iX concept
     * group. Differences:
     *
     *  - concept level <descrip type="entryClass|status"> -->
     *    <system type="entryClass|status">
     *
     *  - <language name="English" locale="en_US" /> -->
     *    <language type="English" lang="EN" />
     *
     *  - <noteGrp><note> -->
     *    <descripGrp><descrip type="note"></descripGrp>
     *
     *  - <note> -->  (should not be produced but could be in old data)
     *    <descripGrp><descrip type="note"></descripGrp>
     *
     *  - <sourceGrp><source></sourceGrp> -->
     *    <descripGrp><descrip type="source"></descripGrp>
     *
     *  - descripGrp is not recursive
     */
    private Document convertToMtf(Document p_elem) {
        List nodes;
        Node node;
        Element root = p_elem.getRootElement();
        Element elem;
        Iterator it;
        ListIterator lit;

        if (false && CATEGORY.isDebugEnabled()) {
            CATEGORY.debug("gt2mtf init: " + p_elem.asXML());
        }

        // rewrite <descrip type=entryClass> (only one on concept level)
        nodes = root.selectNodes("descrip[@type='entryClass']");
        for (it = nodes.iterator(); it.hasNext();) {
            elem = (Element) it.next();
            Element parent = elem.getParent();
            parent.remove(elem);
            parent.addElement("system").addAttribute("type", "entryClass").addText(elem.getText());
        }

        // rewrite <descrip type=status> (?? used in MTF?)
        nodes = root.selectNodes("descrip[@type='status']");
        for (it = nodes.iterator(); it.hasNext();) {
            elem = (Element) it.next();
            Element parent = elem.getParent();
            parent.remove(elem);
            parent.addElement("system").addAttribute("type", "status").addText(elem.getText());
        }

        // rewrite <noteGrp>
        while (true) {
            // refresh the node list, we're rewriting the structure
            node = root.selectSingleNode("//noteGrp");
            if (node == null) {
                break;
            }

            elem = (Element) node;

            Element parent = elem.getParent();
            parent.remove(elem);

            Element newNote = parent.addElement("descripGrp");
            Element note = null;

            // copy all child nodes but remember the <note>
            for (lit = elem.elements().listIterator(); lit.hasNext();) {
                Element child = (Element) lit.next();

                if (child.getName().equals("note")) {
                    note = child;
                } else {
                    lit.remove();
                    newNote.add(child);
                }
            }

            // create new <descrip type="note"> with note's value
            newNote.addElement("descrip").addAttribute("type", "note").addText(note.getText());
        }

        // rewrite single <note>, if any are left in the entry
        while (true) {
            // refresh the node list, we're rewriting the structure
            node = root.selectSingleNode("//note");
            if (node == null) {
                break;
            }

            Element note = (Element) node;

            Element parent = note.getParent();
            parent.remove(note);

            Element newNote = parent.addElement("descripGrp");
            newNote.addElement("descrip").addAttribute("type", "note").addText(note.getText());
        }

        // rewrite <sourceGrp>
        while (true) {
            // refresh the node list, we're rewriting the structure
            node = root.selectSingleNode("//sourceGrp");
            if (node == null) {
                break;
            }

            elem = (Element) node;

            Element parent = elem.getParent();
            parent.remove(elem);

            Element newSource = parent.addElement("descripGrp");
            Element source = null;

            // copy all child nodes but remember the <source>
            for (lit = elem.elements().listIterator(); lit.hasNext();) {
                Element child = (Element) lit.next();

                if (child.getName().equals("source")) {
                    source = child;
                } else {
                    lit.remove();
                    newSource.add(child);
                }
            }

            // create new <descrip type="source"> with source's value
            newSource.addElement("descrip").addAttribute("type", "source").addText(source.getText());
        }

        // rewrite <language>
        nodes = root.selectNodes("//languageGrp/language");

        for (it = nodes.iterator(); it.hasNext();) {
            elem = (Element) it.next();

            Attribute nameAttr = elem.attribute("name");
            Attribute langAttr = elem.attribute("locale");

            String langName = nameAttr.getValue();
            String langLocale = langAttr.getValue();

            // locales in MTF consist of 2 letter codes (uppercase).
            langLocale = langLocale.substring(0, 2).toUpperCase();

            elem.remove(nameAttr);
            elem.remove(langAttr);

            elem.addAttribute("type", langName);
            elem.addAttribute("lang", langLocale);
        }

        if (false && CATEGORY.isDebugEnabled()) {
            CATEGORY.debug("gt2mtf done: " + p_elem.asXML());
        }

        return p_elem;
    }

    private void checkIOError() throws IOException {
        // The JDK is so incredibly inconsistent (aka, stupid).
        // PrintWriter.println() does not throw exceptions.
        if (m_output.checkError()) {
            throw new IOException("write error");
        }
    }
}