Example usage for org.dom4j Element attributeValue

Introduction

In this page you can find the example usage for org.dom4j Element attributeValue.

Prototype

String attributeValue(QName qName);

Source Link

Document

This returns the attribute value for the attribute with the given fully qualified name or null if there is no such attribute or the empty string if the attribute value is empty.

Usage

From source file:com.globalsight.everest.tm.util.trados.TradosFmSgmlTmxToGxml.java

License:Apache License

/**
 * Main method to call, returns the new filename of the result.
 *//*from   w  w w  .ja  v a 2 s  .co m*/
public String convertToGxml(String p_url) throws Exception {
    final String baseName = getBaseName(p_url);
    final String extension = getExtension(p_url);

    info("Converting TMX file to GXML: `" + p_url + "'");
    startOutputFile(baseName);

    m_entryCount = 0;

    // Reading from a file, need to use Xerces.
    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");
    reader.setEntityResolver(DtdResolver.getInstance());
    reader.setValidation(true);

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx", new ElementHandler() {
        final public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_version = element.attributeValue("version");
        }

        final public void onEnd(ElementPath path) {
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/header", new ElementHandler() {
        final public void onStart(ElementPath path) {
        }

        final public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            setOldHeader(element);
            createNewHeader();

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu", new ElementHandler() {
        final public void onStart(ElementPath path) {
            ++m_entryCount;
            m_tuError = false;
        }

        final public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            if (m_tuError) {
                m_errorCount++;
            } else {
                writeEntry(element.asXML());
            }

            // prune the current element to reduce memory
            element.detach();

            element = null;

            if (m_entryCount % 1000 == 0) {
                debug("Entry " + m_entryCount);
            }
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu/tuv/seg", new ElementHandler() {
        final public void onStart(ElementPath path) {
        }

        final public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            try {
                String gxml = handleTuv(element);
                Document doc = parse("<root>" + gxml + "</root>");

                // Remove old content of seg
                List content = element.content();
                for (int i = content.size() - 1; i >= 0; --i) {
                    ((Node) content.get(i)).detach();
                }

                // Add new GXML content (backwards)
                content = doc.getRootElement().content();
                Collections.reverse(content);
                for (int i = content.size() - 1; i >= 0; --i) {
                    Node node = (Node) content.get(i);
                    element.add(node.detach());
                }
            } catch (Throwable ex) {
                m_tuError = true;
            }
        }
    });

    Document document = reader.read(p_url);

    closeOutputFile();

    info("Processed " + m_entryCount + " TUs " + "into file `" + m_filename + "', " + m_errorCount
            + " errors.");

    return m_filename;
}

From source file:com.globalsight.everest.tm.util.trados.TradosFmTmxToGxml.java

License:Apache License

/**
 * Main method to call, returns the new filename of the result.
 *///from  ww w .  j  av a 2s.co m
public String convertToGxml(String p_url) throws Exception {
    final String baseName = getBaseName(p_url);
    final String extension = getExtension(p_url);

    info("Converting TMX file to GXML: `" + p_url + "'");
    startOutputFile(baseName);

    m_entryCount = 0;

    // Reading from a file, need to use Xerces.
    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");
    reader.setEntityResolver(DtdResolver.getInstance());
    reader.setValidation(true);

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx", new ElementHandler() {
        public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_version = element.attributeValue("version");
        }

        public void onEnd(ElementPath path) {
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/header", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            setOldHeader(element);
            createNewHeader();

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;
            m_tuError = false;
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            if (m_tuError) {
                m_errorCount++;
            } else {
                writeEntry(element.asXML());
            }

            // prune the current element to reduce memory
            element.detach();

            element = null;

            if (m_entryCount % 1000 == 0) {
                debug("Entry " + m_entryCount);
            }
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu/tuv/seg", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            try {
                String gxml = handleTuv(element);
                Document doc = parse("<root>" + gxml + "</root>");

                // Remove old content of seg
                List content = element.content();
                for (int i = content.size() - 1; i >= 0; --i) {
                    ((Node) content.get(i)).detach();
                }

                // Add new GXML content (backwards)
                content = doc.getRootElement().content();
                Collections.reverse(content);
                for (int i = content.size() - 1; i >= 0; --i) {
                    Node node = (Node) content.get(i);
                    element.add(node.detach());
                }
            } catch (Throwable ex) {
                m_tuError = true;
            }
        }
    });

    Document document = reader.read(p_url);

    closeOutputFile();

    info("Processed " + m_entryCount + " TUs " + "into file `" + m_filename + "', " + m_errorCount
            + " errors.");

    return m_filename;
}

From source file:com.globalsight.everest.tm.util.trados.TradosHtmlTmxToGxml.java

License:Apache License

/**
 * Main method to call, returns the new filename of the result.
 *///from   ww w .  j  a  v  a2s.  co  m
public String convertToGxml(String p_url) throws Exception {
    final String baseName = getBaseName(p_url);
    final String extension = getExtension(p_url);

    info("Converting TMX file to GXML: `" + p_url + "'");

    startOutputFile(baseName);

    m_entryCount = 0;

    // Reading from a file, need to use Xerces.
    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");
    reader.setEntityResolver(DtdResolver.getInstance());
    reader.setValidation(true);

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx", new ElementHandler() {
        public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_version = element.attributeValue("version");
        }

        public void onEnd(ElementPath path) {
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/header", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();
            setOldHeader(element);
            createNewHeader();

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;
            m_tuError = false;
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            if (m_tuError) {
                m_errorCount++;
            } else {
                writeEntry(element.asXML());
            }

            // prune the current element to reduce memory
            element.detach();

            element = null;

            if (m_entryCount % 1000 == 0) {
                debug("Entry " + m_entryCount);
            }
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu/tuv/seg", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            try {
                element = removeUtElements(element);

                String gxml = handleTuv(element.getText());
                Document doc = parse("<root>" + gxml + "</root>");

                // Remove old content of seg
                List content = element.content();
                for (int i = content.size() - 1; i >= 0; --i) {
                    ((Node) content.get(i)).detach();
                }

                // Add new GXML content (backwards)
                content = doc.getRootElement().content();
                Collections.reverse(content);
                for (int i = content.size() - 1; i >= 0; --i) {
                    Node node = (Node) content.get(i);
                    element.add(node.detach());
                }
            } catch (Throwable ex) {
                m_tuError = true;
            }
        }
    });

    Document document = reader.read(p_url);

    closeOutputFile();

    info("Processed " + m_entryCount + " TUs into file `" + m_filename + "', " + m_errorCount + " errors.");

    return m_filename;
}

From source file:com.globalsight.everest.tm.util.trados.TradosTmxToRtf.java

License:Apache License

/**
 * Main method to call, returns the new filename of the result.
 *//*from  www.j  a v  a2s  .c o m*/
public String convertToRtf(String p_url) throws Exception {
    final String baseName = getBaseName(p_url);
    final String extension = getExtension(p_url);

    info("Converting TMX file to RTF: `" + p_url + "'");

    startOutputFile(baseName);

    m_entryCount = 0;

    // Reading from a file, need to use Xerces.
    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");
    reader.setEntityResolver(DtdResolver.getInstance());
    reader.setValidation(true);

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx", new ElementHandler() {
        public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_version = element.attributeValue("version");
        }

        public void onEnd(ElementPath path) {
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/header", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();
            setOldHeader(element);

            Element prop = (Element) element.selectSingleNode("/prop[@type='RTFFontTable']");

            if (prop != null)
                writeEntry(prop.getText());

            prop = (Element) element.selectSingleNode("/prop[@type='RTFStyleSheet']");

            if (prop != null)
                writeEntry(prop.getText());

            writeOtherRtfHeader();

            writeDummyParagraph();

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            element = removeUtElements(element);

            writeEntry(replaceUnicodeChars(removeRtfParagraphs(element.asXML())));
            writeEntry("\\par");

            // prune the current element to reduce memory
            element.detach();

            element = null;

            if (m_entryCount % 1000 == 0) {
                debug("Entry " + m_entryCount);
            }
        }
    });

    Document document = reader.read(p_url);

    closeOutputFile();

    info("Processed " + m_entryCount + " TUs into file `" + m_filename + "'");

    return m_filename;
}

From source file:com.globalsight.everest.tm.util.ttx.TtxClean.java

License:Apache License

/**
 * Main method to call, returns the new filename of the result.
 *///w w w.j ava  2s .  c o m
public String cleanTtx(String p_url, boolean p_cleanTarget, String p_encoding) throws Exception {
    m_cleanTarget = p_cleanTarget;

    // File is called <file>.<ext>.<ttx>
    final String origName = getBaseName(p_url);
    final String baseName = getBaseName(origName);
    final String extension = getExtension(origName);

    info("Cleaning TTX file to " + (m_cleanTarget ? "target" : "source") + ": `" + p_url + "'");

    m_entryCount = 0;

    // Reading from a file, need to use Xerces.
    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");
    //reader.setEntityResolver(DtdResolver.getInstance());
    //reader.setValidation(true);

    // Fetch the version info early.
    reader.addHandler("/TRADOStag", new ElementHandler() {
        public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_version = element.attributeValue(Ttx.VERSION);
        }

        public void onEnd(ElementPath path) {
        }
    });

    // Fetch the header info early.
    reader.addHandler("/TRADOStag/FrontMatter", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            setOldHeader(element);
        }
    });

    // Read in the entire file (it's not too big normally).
    Document document = reader.read(p_url);

    Element body = (Element) document.getRootElement().selectSingleNode("//Body/Raw");

    // Remove <ut>, <df> and pull out one TUV.
    processBody(body);

    String content = getInnerText(body);
    String encoding;

    if (m_cleanTarget) {
        if (p_encoding != null) {
            encoding = p_encoding;
        } else {
            encoding = "UTF-8";
        }
    } else {
        // reuse original encoding
        encoding = m_header.getOriginalEncoding();
    }

    String locale;

    if (m_cleanTarget) {
        locale = m_header.getTargetLanguage();
    } else {
        locale = m_header.getSourceLanguage();
    }

    startOutputFile(baseName, locale, extension, encoding);
    writeEntry(content);
    closeOutputFile();

    info("Result written to file `" + m_filename + "'.");

    return m_filename;
}

From source file:com.globalsight.everest.tm.util.ttx.TtxToTmx.java

License:Apache License

/**
 * Main method to call, returns the new filename of the result.
 *//*w w  w  . j  a  v a2  s . c om*/
public String convertTtxToTmx(String p_url) throws Exception {
    final String baseName = getBaseName(p_url);
    final String extension = getExtension(baseName);

    info("Converting TTX file to TMX: `" + p_url + "'");

    m_entryCount = 0;

    // Reading from a file, need to use Xerces.
    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");
    //reader.setEntityResolver(DtdResolver.getInstance());
    //reader.setValidation(true);

    // Fetch the version info early.
    reader.addHandler("/TRADOStag", new ElementHandler() {
        public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_version = element.attributeValue(Ttx.VERSION);
        }

        public void onEnd(ElementPath path) {
        }
    });

    // Fetch the header info early.
    reader.addHandler("/TRADOStag/FrontMatter", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            setTtxHeader(element);

            try {
                startOutputFile(baseName);
            } catch (Exception ex) {
                error(ex.toString());
                System.exit(1);
            }

            // prune the current element to reduce memory
            element.detach();
            element = null;
        }
    });

    ElementHandler tuHandler = new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            element = cleanupTu(element);

            writeEntry(element.asXML());

            // prune the current element to reduce memory
            element.detach();
            element = null;

            if (m_entryCount % 50 == 0) {
                debug("Entry " + m_entryCount);
            }
        }
    };

    // Path handlers cannot use "//", sooo specify all known paths.
    reader.addHandler("/TRADOStag/Body/Raw/Tu", tuHandler);
    reader.addHandler("/TRADOStag/Body/Raw/df/Tu", tuHandler);
    reader.addHandler("/TRADOStag/Body/Raw/ut/Tu", tuHandler);
    reader.addHandler("/TRADOStag/Body/Raw/df/ut/Tu", tuHandler);

    // Read in the entire file (it's not too big normally).
    Document document = reader.read(p_url);

    closeOutputFile();

    info("Processed " + m_entryCount + " TUs into file `" + m_filename + "'");

    return m_filename;
}

From source file:com.globalsight.everest.tm.util.Ttx.java

License:Apache License

private void init(Element p_element) {
    Element elem;
    Attribute attr;/*from w ww. j a va  2s .c  o m*/
    List nodes;
    Date date;

    elem = (Element) p_element.selectSingleNode("//ToolSettings");

    attr = elem.attribute(CREATIONDATE);
    if (attr == null) {
        date = null;
    } else {
        date = UTC.parseNoSeparators(attr.getValue());
        if (date == null) {
            date = UTC.parse(attr.getValue());
        }
    }
    m_creationdate = date;

    m_creationtool = elem.attributeValue(CREATIONTOOL);
    m_creationtoolversion = elem.attributeValue(CREATIONTOOLVERSION);

    elem = (Element) p_element.selectSingleNode("//UserSettings");

    m_datatype = elem.attributeValue(DATATYPE);
    m_o_encoding = elem.attributeValue(O_ENCODING);
    m_settingsname = elem.attributeValue(SETTINGSNAME);
    m_settingspath = elem.attributeValue(SETTINGSPATH);
    m_sourcelanguage = elem.attributeValue(SOURCELANGUAGE);
    m_targetlanguage = elem.attributeValue(TARGETLANGUAGE);
    m_targetdefaultfont = elem.attributeValue(TARGETDEFAULTFONT);
    m_sourcedocumentpath = elem.attributeValue(SOURCEDOCUMENTPATH);
}

From source file:com.globalsight.ling.docproc.DiplomatWordCounter.java

License:Apache License

/**
 * Assigns word counts to all sub-flows in the segment.
 * //from   w w  w. j  ava  2s. com
 * @ return true if has sub.
 */
private boolean countSubs(Element p_element) {
    int words;
    ArrayList elems = new ArrayList();

    findSubElements(elems, p_element);

    for (int i = 0, max = elems.size(); i < max; i++) {
        Element sub = (Element) elems.get(i);

        if (!isSkipElement(sub)) {
            String subLocType = sub.attributeValue(DiplomatNames.Attribute.LOCTYPE);
            if (subLocType == null || subLocType.equals(DiplomatNames.Element.TRANSLATABLE)) {
                words = countWords(sub);
            } else {
                // Localizables count as 1 token or 0, depending on
                // the configuration (Diplomat.properties).
                words = m_localizableCount;
            }

            // Sub-flow word counts contribute to overall word count.
            m_totalWordCount += words;
            sub.addAttribute(DiplomatNames.Attribute.WORDCOUNT, String.valueOf(words));
        } else {
            // Currently, this only affect the JavaScrpt embedded in the
            // HTML
            // Attribute.
            sub.addAttribute(DiplomatNames.Attribute.WORDCOUNT, "0");
        }
    }

    return elems.size() > 0;
}

From source file:com.globalsight.ling.docproc.DiplomatWordCounter.java

License:Apache License

/**
 * Returns the string value of an element with tags representing whitespace
 * replaced by either whitespace or nbsps.
 *//*from   w  w  w . j  a  v  a2  s.  c  om*/
static public String getTextWithWhite(Element p_node, boolean... bs) {
    StringBuffer result = new StringBuffer();

    List content = p_node.content();

    for (int i = 0, max = content.size(); i < max; i++) {
        Node node = (Node) content.get(i);

        if (node.getNodeType() == Node.TEXT_NODE && bs.length == 0) {
            boolean isInternalText = isInternalText(content, i);
            if (!isInternalText) {
                result.append(node.getText());
            } else {
                // add space around internal text
                result.append(" ").append(node.getText()).append(" ");
            }
        } else if (node.getNodeType() == Node.ELEMENT_NODE) {
            Element elem = (Element) node;
            String type = elem.attributeValue("type");
            int childNodes = elem.content().size();
            // For word counting, always treat TMX whitespace tags
            // as white.
            if (Text.isTmxWhitespaceNode(type) || Text.isTmxMsoWhitespaceNode(type)) {
                result.append(" ");
            } else {
                if (childNodes > 0) {
                    boolean isExtract = false;
                    for (int j = 0; j < childNodes; j++) {
                        if (((Node) elem.content().get(j)).getNodeType() == Node.ELEMENT_NODE) {
                            String s = ((Element) elem.content().get(j)).attributeValue("isTranslate");
                            String innerTextNodeIndex = ((Element) elem.content().get(j))
                                    .attributeValue("innerTextNodeIndex");
                            if (s != null && Boolean.parseBoolean(s)) {
                                isExtract = true;
                                // getTextWithWhite((Element)elem.content().get(j),
                                // true);
                                // ((Element)elem.content().get(j)).
                                // result.append(getTranslateInnerXml((Element)
                                // elem.content().get(j)));
                            } else {
                                isExtract = false;
                            }

                        } else if (((Node) elem.content().get(j)).getNodeType() == Node.TEXT_NODE
                                && isExtract) {
                            result.append(((Node) elem.content().get(j)).getText());
                        }
                    }
                }
            }
        } else {
            System.err.println("Please fix the word counter: " + node);
        }
    }

    return result.toString();
}

From source file:com.globalsight.ling.docproc.DiplomatWordCounter.java

License:Apache License

private static boolean isInternalText(List content, int i) {
    if (i == 0 || i + 1 >= content.size()) {
        return false;
    }/*from   w w w  .j a  va2 s . c om*/

    Node prenode = (Node) content.get(i - 1);
    Node nextnode = (Node) content.get(i + 1);

    if (prenode.getNodeType() != Node.ELEMENT_NODE || nextnode.getNodeType() != Node.ELEMENT_NODE) {
        return false;
    }

    Element preElem = (Element) prenode;
    Element nextElem = (Element) nextnode;

    String preelemName = preElem.getName();
    String nextelemName = nextElem.getName();
    String isInternal = preElem.attributeValue("internal");

    if ("bpt".equalsIgnoreCase(preelemName) && "ept".equalsIgnoreCase(nextelemName)
            && "yes".equalsIgnoreCase(isInternal)) {
        return true;
    } else {
        return false;
    }
}