List of usage examples for org.dom4j Element attributeValue
String attributeValue(QName qName);
From source file:com.globalsight.everest.tm.util.trados.TradosFmSgmlTmxToGxml.java
License:Apache License
/** * Main method to call, returns the new filename of the result. *//*from w w w .ja v a 2 s .co m*/ public String convertToGxml(String p_url) throws Exception { final String baseName = getBaseName(p_url); final String extension = getExtension(p_url); info("Converting TMX file to GXML: `" + p_url + "'"); startOutputFile(baseName); m_entryCount = 0; // Reading from a file, need to use Xerces. SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); reader.setEntityResolver(DtdResolver.getInstance()); reader.setValidation(true); // enable element complete notifications to conserve memory reader.addHandler("/tmx", new ElementHandler() { final public void onStart(ElementPath path) { Element element = path.getCurrent(); m_version = element.attributeValue("version"); } final public void onEnd(ElementPath path) { } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/header", new ElementHandler() { final public void onStart(ElementPath path) { } final public void onEnd(ElementPath path) { Element element = path.getCurrent(); setOldHeader(element); createNewHeader(); // prune the current element to reduce memory element.detach(); element = null; } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/body/tu", new ElementHandler() { final public void onStart(ElementPath path) { ++m_entryCount; m_tuError = false; } final public void onEnd(ElementPath path) { Element element = path.getCurrent(); if (m_tuError) { m_errorCount++; } else { writeEntry(element.asXML()); } // prune the current element to reduce memory element.detach(); element = null; if (m_entryCount % 1000 == 0) { debug("Entry " + m_entryCount); } } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/body/tu/tuv/seg", new ElementHandler() { final public void onStart(ElementPath path) { } final public void onEnd(ElementPath path) { Element element = path.getCurrent(); try { String gxml = handleTuv(element); Document doc = parse("<root>" + gxml + "</root>"); // Remove old content of seg List content = element.content(); for (int i = content.size() - 1; i >= 0; --i) { ((Node) content.get(i)).detach(); } // Add new GXML content (backwards) content = doc.getRootElement().content(); Collections.reverse(content); for (int i = content.size() - 1; i >= 0; --i) { Node node = (Node) content.get(i); element.add(node.detach()); } } catch (Throwable ex) { m_tuError = true; } } }); Document document = reader.read(p_url); closeOutputFile(); info("Processed " + m_entryCount + " TUs " + "into file `" + m_filename + "', " + m_errorCount + " errors."); return m_filename; }
From source file:com.globalsight.everest.tm.util.trados.TradosFmTmxToGxml.java
License:Apache License
/** * Main method to call, returns the new filename of the result. *///from ww w . j av a 2s.co m public String convertToGxml(String p_url) throws Exception { final String baseName = getBaseName(p_url); final String extension = getExtension(p_url); info("Converting TMX file to GXML: `" + p_url + "'"); startOutputFile(baseName); m_entryCount = 0; // Reading from a file, need to use Xerces. SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); reader.setEntityResolver(DtdResolver.getInstance()); reader.setValidation(true); // enable element complete notifications to conserve memory reader.addHandler("/tmx", new ElementHandler() { public void onStart(ElementPath path) { Element element = path.getCurrent(); m_version = element.attributeValue("version"); } public void onEnd(ElementPath path) { } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/header", new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element element = path.getCurrent(); setOldHeader(element); createNewHeader(); // prune the current element to reduce memory element.detach(); element = null; } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/body/tu", new ElementHandler() { public void onStart(ElementPath path) { ++m_entryCount; m_tuError = false; } public void onEnd(ElementPath path) { Element element = path.getCurrent(); if (m_tuError) { m_errorCount++; } else { writeEntry(element.asXML()); } // prune the current element to reduce memory element.detach(); element = null; if (m_entryCount % 1000 == 0) { debug("Entry " + m_entryCount); } } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/body/tu/tuv/seg", new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element element = path.getCurrent(); try { String gxml = handleTuv(element); Document doc = parse("<root>" + gxml + "</root>"); // Remove old content of seg List content = element.content(); for (int i = content.size() - 1; i >= 0; --i) { ((Node) content.get(i)).detach(); } // Add new GXML content (backwards) content = doc.getRootElement().content(); Collections.reverse(content); for (int i = content.size() - 1; i >= 0; --i) { Node node = (Node) content.get(i); element.add(node.detach()); } } catch (Throwable ex) { m_tuError = true; } } }); Document document = reader.read(p_url); closeOutputFile(); info("Processed " + m_entryCount + " TUs " + "into file `" + m_filename + "', " + m_errorCount + " errors."); return m_filename; }
From source file:com.globalsight.everest.tm.util.trados.TradosHtmlTmxToGxml.java
License:Apache License
/** * Main method to call, returns the new filename of the result. *///from ww w . j a v a2s. co m public String convertToGxml(String p_url) throws Exception { final String baseName = getBaseName(p_url); final String extension = getExtension(p_url); info("Converting TMX file to GXML: `" + p_url + "'"); startOutputFile(baseName); m_entryCount = 0; // Reading from a file, need to use Xerces. SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); reader.setEntityResolver(DtdResolver.getInstance()); reader.setValidation(true); // enable element complete notifications to conserve memory reader.addHandler("/tmx", new ElementHandler() { public void onStart(ElementPath path) { Element element = path.getCurrent(); m_version = element.attributeValue("version"); } public void onEnd(ElementPath path) { } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/header", new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element element = path.getCurrent(); setOldHeader(element); createNewHeader(); // prune the current element to reduce memory element.detach(); element = null; } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/body/tu", new ElementHandler() { public void onStart(ElementPath path) { ++m_entryCount; m_tuError = false; } public void onEnd(ElementPath path) { Element element = path.getCurrent(); if (m_tuError) { m_errorCount++; } else { writeEntry(element.asXML()); } // prune the current element to reduce memory element.detach(); element = null; if (m_entryCount % 1000 == 0) { debug("Entry " + m_entryCount); } } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/body/tu/tuv/seg", new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element element = path.getCurrent(); try { element = removeUtElements(element); String gxml = handleTuv(element.getText()); Document doc = parse("<root>" + gxml + "</root>"); // Remove old content of seg List content = element.content(); for (int i = content.size() - 1; i >= 0; --i) { ((Node) content.get(i)).detach(); } // Add new GXML content (backwards) content = doc.getRootElement().content(); Collections.reverse(content); for (int i = content.size() - 1; i >= 0; --i) { Node node = (Node) content.get(i); element.add(node.detach()); } } catch (Throwable ex) { m_tuError = true; } } }); Document document = reader.read(p_url); closeOutputFile(); info("Processed " + m_entryCount + " TUs into file `" + m_filename + "', " + m_errorCount + " errors."); return m_filename; }
From source file:com.globalsight.everest.tm.util.trados.TradosTmxToRtf.java
License:Apache License
/** * Main method to call, returns the new filename of the result. *//*from www.j a v a2s .c o m*/ public String convertToRtf(String p_url) throws Exception { final String baseName = getBaseName(p_url); final String extension = getExtension(p_url); info("Converting TMX file to RTF: `" + p_url + "'"); startOutputFile(baseName); m_entryCount = 0; // Reading from a file, need to use Xerces. SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); reader.setEntityResolver(DtdResolver.getInstance()); reader.setValidation(true); // enable element complete notifications to conserve memory reader.addHandler("/tmx", new ElementHandler() { public void onStart(ElementPath path) { Element element = path.getCurrent(); m_version = element.attributeValue("version"); } public void onEnd(ElementPath path) { } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/header", new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element element = path.getCurrent(); setOldHeader(element); Element prop = (Element) element.selectSingleNode("/prop[@type='RTFFontTable']"); if (prop != null) writeEntry(prop.getText()); prop = (Element) element.selectSingleNode("/prop[@type='RTFStyleSheet']"); if (prop != null) writeEntry(prop.getText()); writeOtherRtfHeader(); writeDummyParagraph(); // prune the current element to reduce memory element.detach(); element = null; } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/body/tu", new ElementHandler() { public void onStart(ElementPath path) { ++m_entryCount; } public void onEnd(ElementPath path) { Element element = path.getCurrent(); element = removeUtElements(element); writeEntry(replaceUnicodeChars(removeRtfParagraphs(element.asXML()))); writeEntry("\\par"); // prune the current element to reduce memory element.detach(); element = null; if (m_entryCount % 1000 == 0) { debug("Entry " + m_entryCount); } } }); Document document = reader.read(p_url); closeOutputFile(); info("Processed " + m_entryCount + " TUs into file `" + m_filename + "'"); return m_filename; }
From source file:com.globalsight.everest.tm.util.ttx.TtxClean.java
License:Apache License
/** * Main method to call, returns the new filename of the result. *///w w w.j ava 2s . c o m public String cleanTtx(String p_url, boolean p_cleanTarget, String p_encoding) throws Exception { m_cleanTarget = p_cleanTarget; // File is called <file>.<ext>.<ttx> final String origName = getBaseName(p_url); final String baseName = getBaseName(origName); final String extension = getExtension(origName); info("Cleaning TTX file to " + (m_cleanTarget ? "target" : "source") + ": `" + p_url + "'"); m_entryCount = 0; // Reading from a file, need to use Xerces. SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); //reader.setEntityResolver(DtdResolver.getInstance()); //reader.setValidation(true); // Fetch the version info early. reader.addHandler("/TRADOStag", new ElementHandler() { public void onStart(ElementPath path) { Element element = path.getCurrent(); m_version = element.attributeValue(Ttx.VERSION); } public void onEnd(ElementPath path) { } }); // Fetch the header info early. reader.addHandler("/TRADOStag/FrontMatter", new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element element = path.getCurrent(); setOldHeader(element); } }); // Read in the entire file (it's not too big normally). Document document = reader.read(p_url); Element body = (Element) document.getRootElement().selectSingleNode("//Body/Raw"); // Remove <ut>, <df> and pull out one TUV. processBody(body); String content = getInnerText(body); String encoding; if (m_cleanTarget) { if (p_encoding != null) { encoding = p_encoding; } else { encoding = "UTF-8"; } } else { // reuse original encoding encoding = m_header.getOriginalEncoding(); } String locale; if (m_cleanTarget) { locale = m_header.getTargetLanguage(); } else { locale = m_header.getSourceLanguage(); } startOutputFile(baseName, locale, extension, encoding); writeEntry(content); closeOutputFile(); info("Result written to file `" + m_filename + "'."); return m_filename; }
From source file:com.globalsight.everest.tm.util.ttx.TtxToTmx.java
License:Apache License
/** * Main method to call, returns the new filename of the result. *//*w w w . j a v a2 s . c om*/ public String convertTtxToTmx(String p_url) throws Exception { final String baseName = getBaseName(p_url); final String extension = getExtension(baseName); info("Converting TTX file to TMX: `" + p_url + "'"); m_entryCount = 0; // Reading from a file, need to use Xerces. SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); //reader.setEntityResolver(DtdResolver.getInstance()); //reader.setValidation(true); // Fetch the version info early. reader.addHandler("/TRADOStag", new ElementHandler() { public void onStart(ElementPath path) { Element element = path.getCurrent(); m_version = element.attributeValue(Ttx.VERSION); } public void onEnd(ElementPath path) { } }); // Fetch the header info early. reader.addHandler("/TRADOStag/FrontMatter", new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element element = path.getCurrent(); setTtxHeader(element); try { startOutputFile(baseName); } catch (Exception ex) { error(ex.toString()); System.exit(1); } // prune the current element to reduce memory element.detach(); element = null; } }); ElementHandler tuHandler = new ElementHandler() { public void onStart(ElementPath path) { ++m_entryCount; } public void onEnd(ElementPath path) { Element element = path.getCurrent(); element = cleanupTu(element); writeEntry(element.asXML()); // prune the current element to reduce memory element.detach(); element = null; if (m_entryCount % 50 == 0) { debug("Entry " + m_entryCount); } } }; // Path handlers cannot use "//", sooo specify all known paths. reader.addHandler("/TRADOStag/Body/Raw/Tu", tuHandler); reader.addHandler("/TRADOStag/Body/Raw/df/Tu", tuHandler); reader.addHandler("/TRADOStag/Body/Raw/ut/Tu", tuHandler); reader.addHandler("/TRADOStag/Body/Raw/df/ut/Tu", tuHandler); // Read in the entire file (it's not too big normally). Document document = reader.read(p_url); closeOutputFile(); info("Processed " + m_entryCount + " TUs into file `" + m_filename + "'"); return m_filename; }
From source file:com.globalsight.everest.tm.util.Ttx.java
License:Apache License
private void init(Element p_element) { Element elem; Attribute attr;/*from w ww. j a va 2s .c o m*/ List nodes; Date date; elem = (Element) p_element.selectSingleNode("//ToolSettings"); attr = elem.attribute(CREATIONDATE); if (attr == null) { date = null; } else { date = UTC.parseNoSeparators(attr.getValue()); if (date == null) { date = UTC.parse(attr.getValue()); } } m_creationdate = date; m_creationtool = elem.attributeValue(CREATIONTOOL); m_creationtoolversion = elem.attributeValue(CREATIONTOOLVERSION); elem = (Element) p_element.selectSingleNode("//UserSettings"); m_datatype = elem.attributeValue(DATATYPE); m_o_encoding = elem.attributeValue(O_ENCODING); m_settingsname = elem.attributeValue(SETTINGSNAME); m_settingspath = elem.attributeValue(SETTINGSPATH); m_sourcelanguage = elem.attributeValue(SOURCELANGUAGE); m_targetlanguage = elem.attributeValue(TARGETLANGUAGE); m_targetdefaultfont = elem.attributeValue(TARGETDEFAULTFONT); m_sourcedocumentpath = elem.attributeValue(SOURCEDOCUMENTPATH); }
From source file:com.globalsight.ling.docproc.DiplomatWordCounter.java
License:Apache License
/** * Assigns word counts to all sub-flows in the segment. * //from w w w. j ava 2s. com * @ return true if has sub. */ private boolean countSubs(Element p_element) { int words; ArrayList elems = new ArrayList(); findSubElements(elems, p_element); for (int i = 0, max = elems.size(); i < max; i++) { Element sub = (Element) elems.get(i); if (!isSkipElement(sub)) { String subLocType = sub.attributeValue(DiplomatNames.Attribute.LOCTYPE); if (subLocType == null || subLocType.equals(DiplomatNames.Element.TRANSLATABLE)) { words = countWords(sub); } else { // Localizables count as 1 token or 0, depending on // the configuration (Diplomat.properties). words = m_localizableCount; } // Sub-flow word counts contribute to overall word count. m_totalWordCount += words; sub.addAttribute(DiplomatNames.Attribute.WORDCOUNT, String.valueOf(words)); } else { // Currently, this only affect the JavaScrpt embedded in the // HTML // Attribute. sub.addAttribute(DiplomatNames.Attribute.WORDCOUNT, "0"); } } return elems.size() > 0; }
From source file:com.globalsight.ling.docproc.DiplomatWordCounter.java
License:Apache License
/** * Returns the string value of an element with tags representing whitespace * replaced by either whitespace or nbsps. *//*from w w w . j a v a2 s. c om*/ static public String getTextWithWhite(Element p_node, boolean... bs) { StringBuffer result = new StringBuffer(); List content = p_node.content(); for (int i = 0, max = content.size(); i < max; i++) { Node node = (Node) content.get(i); if (node.getNodeType() == Node.TEXT_NODE && bs.length == 0) { boolean isInternalText = isInternalText(content, i); if (!isInternalText) { result.append(node.getText()); } else { // add space around internal text result.append(" ").append(node.getText()).append(" "); } } else if (node.getNodeType() == Node.ELEMENT_NODE) { Element elem = (Element) node; String type = elem.attributeValue("type"); int childNodes = elem.content().size(); // For word counting, always treat TMX whitespace tags // as white. if (Text.isTmxWhitespaceNode(type) || Text.isTmxMsoWhitespaceNode(type)) { result.append(" "); } else { if (childNodes > 0) { boolean isExtract = false; for (int j = 0; j < childNodes; j++) { if (((Node) elem.content().get(j)).getNodeType() == Node.ELEMENT_NODE) { String s = ((Element) elem.content().get(j)).attributeValue("isTranslate"); String innerTextNodeIndex = ((Element) elem.content().get(j)) .attributeValue("innerTextNodeIndex"); if (s != null && Boolean.parseBoolean(s)) { isExtract = true; // getTextWithWhite((Element)elem.content().get(j), // true); // ((Element)elem.content().get(j)). // result.append(getTranslateInnerXml((Element) // elem.content().get(j))); } else { isExtract = false; } } else if (((Node) elem.content().get(j)).getNodeType() == Node.TEXT_NODE && isExtract) { result.append(((Node) elem.content().get(j)).getText()); } } } } } else { System.err.println("Please fix the word counter: " + node); } } return result.toString(); }
From source file:com.globalsight.ling.docproc.DiplomatWordCounter.java
License:Apache License
private static boolean isInternalText(List content, int i) { if (i == 0 || i + 1 >= content.size()) { return false; }/*from w w w .j a va2 s . c om*/ Node prenode = (Node) content.get(i - 1); Node nextnode = (Node) content.get(i + 1); if (prenode.getNodeType() != Node.ELEMENT_NODE || nextnode.getNodeType() != Node.ELEMENT_NODE) { return false; } Element preElem = (Element) prenode; Element nextElem = (Element) nextnode; String preelemName = preElem.getName(); String nextelemName = nextElem.getName(); String isInternal = preElem.attributeValue("internal"); if ("bpt".equalsIgnoreCase(preelemName) && "ept".equalsIgnoreCase(nextelemName) && "yes".equalsIgnoreCase(isInternal)) { return true; } else { return false; } }