List of usage examples for org.dom4j Document getRootElement
Element getRootElement();
From source file:com.globalsight.everest.projecthandler.ProjectTmTuvT.java
License:Apache License
/** * Convert a segment string to TMX by removing <sub> elements. * // w w w . j ava 2 s .c o m * TODO: output sub information as <prop>. */ private String convertToTmx(String p_segment) { StringBuffer result = new StringBuffer(); Document dom = getDom(p_segment); result.append("<seg>"); result.append(getInnerXml(dom.getRootElement())); result.append("</seg>\r\n"); return result.toString(); }
From source file:com.globalsight.everest.segmentationhelper.XmlLoader.java
License:Apache License
public static SegmentationRule parseSegmentationRule(String p_xmlText) throws Exception { Document doc = parseWithSAX(p_xmlText); Element root = doc.getRootElement(); SegmentationRule segmentationRule = new SegmentationRule(); SrxHeader header = new SrxHeader(); HashMap<String, String> formathandle = new HashMap<String, String>(); HashMap<String, ArrayList<Rule>> langrules = new HashMap<String, ArrayList<Rule>>(); ArrayList<LanguageMap> langruleMap = new ArrayList<LanguageMap>(); segmentationRule.setRootName(root.getQualifiedName()); // Now it only supports SRX2.0. segmentationRule.setVersion(root.attributeValue("version")); Element headerElement = root.element("header"); Element bodyElement = root.element("body"); Element maprulesElement = bodyElement.element("maprules"); // to get SRX header informations if (headerElement != null) { // option segmentsubflows String segsub = headerElement.attributeValue("segmentsubflows"); if (segsub == null) { // Take default value. header.isSegmentsubflows(true); } else {/*from w ww . j av a 2 s .c o m*/ header.isSegmentsubflows(!segsub.equalsIgnoreCase("no")); } // option cascade String cascade = headerElement.attributeValue("cascade"); if (cascade == null) { header.isCascade(false); } else { header.isCascade(!cascade.equalsIgnoreCase("no")); } // elements : formathandle List formatList = headerElement.elements("formathandle"); Iterator formatIter = formatList != null ? formatList.iterator() : (new ArrayList()).iterator(); // If the header does not contain formathandle elements // we use the default values if (!formatIter.hasNext()) { formathandle.put("start", "no"); formathandle.put("end", "yes"); formathandle.put("isolated", "no"); } // If the header contains formathandle elements // we use the values specified by formathandle elements else { while (formatIter.hasNext()) { Element formatElement = (Element) formatIter.next(); String type = formatElement.attributeValue("type"); String include = formatElement.attributeValue("include"); formathandle.put(type, include); } } header.setFormatHandle(formathandle); // okpsrx extension Element okpOptions = headerElement.element("options"); Element okpSample = headerElement.element("sample"); Element okpRangeRule = headerElement.element("rangeRule"); if (okpOptions != null) { String oneSegmentIncludesAll = okpOptions.attributeValue("oneSegmentIncludesAll"); String trimLeadingWs = okpOptions.attributeValue("trimLeadingWhitespaces"); String trimTrailingWs = okpOptions.attributeValue("trimTrailingWhitespaces"); header.setOneSegmentIncludesAll("yes".equalsIgnoreCase(oneSegmentIncludesAll)); header.setTrimLeadingWhitespaces("yes".equalsIgnoreCase(trimLeadingWs)); header.setTrimTrailingWhitespaces("yes".equalsIgnoreCase(trimTrailingWs)); } if (okpSample != null) { String language = okpSample.attributeValue("language"); String useMappedRules = okpSample.attributeValue("useMappedRules"); String sample = okpSample.getText(); header.setSample(sample); header.setSampleLanguage(language); header.setUseMappedRulesForSample(!"no".equalsIgnoreCase(useMappedRules)); } if (okpRangeRule != null) { String rangeRule = okpRangeRule.getText(); header.setRangeRule(rangeRule); } } if (bodyElement != null) { Element languagerulesElement = bodyElement.element("languagerules"); if (languagerulesElement != null) { Iterator languageruleIter = languagerulesElement.elementIterator(); while (languageruleIter.hasNext()) { Element languageruleElement = (Element) languageruleIter.next(); String languageName = languageruleElement.attributeValue("languagerulename"); Iterator ruleIter = languageruleElement.elementIterator(); ArrayList<Rule> rules = new ArrayList<Rule>(); while (ruleIter.hasNext()) { Element ruleSub = (Element) ruleIter.next(); String breakvalue = ruleSub.attributeValue("break"); String beforebreak = ruleSub.elementText("beforebreak"); String afterbreak = ruleSub.elementText("afterbreak"); Rule rule = new Rule(); if (breakvalue == null) { // Take default value. rule.isBreak(true); } else { if (breakvalue.equalsIgnoreCase("no")) { rule.isBreak(false); } else { rule.isBreak(true); } } // System.out.println(rule.getBreak()); rule.setAfterBreak(afterbreak); rule.setBeforeBreak(beforebreak); rules.add(rule); } langrules.put(languageName, rules); } } // end languageruleElement } // end bodyElement if (maprulesElement != null) { Iterator languagemapIter = maprulesElement.elementIterator(); while (languagemapIter.hasNext()) { Element languagemapElement = (Element) languagemapIter.next(); String languagepattern = languagemapElement.attributeValue("languagepattern"); String languagerulename = languagemapElement.attributeValue("languagerulename"); LanguageMap langMap = new LanguageMap(); langMap.setLanguagePattern(languagepattern); langMap.setLanguageruleName(languagerulename); langruleMap.add(langMap); } } segmentationRule.setHeader(header); segmentationRule.setLanguageMap(langruleMap); segmentationRule.setRules(langrules); return segmentationRule; }
From source file:com.globalsight.everest.tm.exporter.TmxChecker.java
License:Apache License
@SuppressWarnings({ "unchecked", "rawtypes" }) public String fixSegment(String segment) { Document dom = getDom(segment); Element root = dom.getRootElement(); Iterator ite = dtdMap.entrySet().iterator(); while (ite.hasNext()) { Map.Entry entry = (Map.Entry) ite.next(); String key = (String) entry.getKey(); ArrayList array = (ArrayList) entry.getValue(); String nodeName = "//" + key; List nodes = root.selectNodes(nodeName); for (int x = 0; x < nodes.size(); x++) { Element node = (Element) nodes.get(x); Attribute internalAttr = node.attribute("internal"); Attribute typeAttr = node.attribute("type"); ArrayList list = new ArrayList(); list.addAll(node.attributes()); resetXAttribute(key, list);/*from ww w. jav a 2 s. c o m*/ for (int y = 0; y < list.size(); y++) { Attribute temp = (Attribute) list.get(y); String name = temp.getName(); if (!array.contains(name)) { node.remove(temp); } } // GBS-3537 & GBS-3691 if (internalAttr != null && "yes".equalsIgnoreCase(internalAttr.getValue())) { String exportedType = "x-internal"; if (typeAttr != null) { String type = typeAttr.getValue(); if (StringUtil.isNotEmpty(type)) { exportedType += "-" + type.trim().toLowerCase(); } typeAttr.setValue(exportedType); } else { node.add(new DefaultAttribute("type", exportedType)); } } } } return root.asXML(); }
From source file:com.globalsight.everest.tm.exporter.TmxChecker.java
License:Apache License
/** * When export TM, "internal='yes' type='style'" will be merged to * "type='x-internal-style'"; When import back, need revert back. * * @param segment/*from w w w .j a v a 2s . c om*/ * @return */ public String revertInternalTag(String segment) { Document dom = getDom(segment); Element root = dom.getRootElement(); for (String tag : dtdMap.keySet()) { String nodeName = "//" + tag; List nodes = root.selectNodes(nodeName); for (int x = 0; x < nodes.size(); x++) { Element node = (Element) nodes.get(x); if (node.attribute("type") != null && node.attribute("type").getValue() != null) { Attribute typeAttr = node.attribute("type"); String type = typeAttr.getValue(); if ("x-internal".equalsIgnoreCase(type)) { node.remove(typeAttr); node.add(new DefaultAttribute("internal", "yes")); } else if (type.startsWith("x-internal")) { String realTypeValue = type.substring("x-internal".length() + 1); typeAttr.setValue(realTypeValue); node.add(new DefaultAttribute("internal", "yes")); } } } } return root.asXML(); }
From source file:com.globalsight.everest.tm.exporter.TmxWriter.java
License:Apache License
/** * Converts the native information in a TUV to what we want to output * according to the TMX export level. This means we strip out internal tags * for level 1, and convert tags to Trados form if we need to. *///from w w w. ja v a2s. com public static SegmentTmTuv convertTuvToTmxLevel(SegmentTmTu p_tu, SegmentTmTuv p_tuv, int p_level) { String format = p_tu.getFormat(); Document dom = getDom(p_tuv.getSegment()); Element root = dom.getRootElement(); // For any non-native format, remove all non-TMX attributes. // We output the tmx-gs.dtd (in ../util) for NATIVE. if (p_level != TMX_LEVEL_NATIVE) { removeNonTmxAttributes(root); } // TMX Level 1 does not contain any internal tags. if (p_level == TMX_LEVEL_1) { replaceNbsps(root); removeNodes(root, "//bpt"); removeNodes(root, "//ept"); removeNodes(root, "//ph"); removeNodes(root, "//it"); removeNodes(root, "//ut"); removeNodes(root, "//hi"); } // TMX_LEVEL_2, Native G-TMX, TMX_LEVEL_TRADOS else { // TMX Compliance: output formatting tags like <bpt type="bold"/> // with an HTML code inside. if (format.equalsIgnoreCase("html")) { injectStandardFormattingCodes(root); } // Remove any SUB tags. removeSubElements(root); } p_tuv.setSegment(root.asXML()); return p_tuv; }
From source file:com.globalsight.everest.tm.exporter.TmxWriter.java
License:Apache License
/** * Convert a segment string to TMX by removing <sub> elements. * /*from w ww. java2 s . c o m*/ * TODO: output sub information as <prop>. */ private static String convertToTmx(String p_segment, OutputFormat outputFormat) { StringBuffer result = new StringBuffer(); Document dom = getDom(p_segment); result.append("<seg>"); result.append(getInnerXml(dom.getRootElement(), outputFormat)); result.append("</seg>\r\n"); return result.toString(); }
From source file:com.globalsight.everest.tm.exporter.TmxWriter.java
License:Apache License
/** * Fixes the value of the "i" attribute across TUVs. In TM2, "i" is unique * across all TUVs, for TMX compliance thes "i" linked by "x" must be the * same. Furthermore, "x" numbering must start at 1. *//* ww w.jav a 2s .co m*/ private void fixAttributeIX(SegmentTmTuv p_sourceTuv, List<BaseTmTuv> p_tuvs) { ArrayList<Element> roots = new ArrayList<Element>(); for (int i = 0; i < p_tuvs.size(); i++) { SegmentTmTuv tuv = (SegmentTmTuv) p_tuvs.get(i); Document dom = getDom(tuv.getSegment()); roots.add(dom.getRootElement()); } Element sroot = getDom(p_sourceTuv.getSegment()).getRootElement(); fixAttributeIX(sroot, roots); // Save the modified segments back into the tuvs. p_sourceTuv.setSegment(sroot.asXML()); for (int i = 0; i < p_tuvs.size(); i++) { SegmentTmTuv tuv = (SegmentTmTuv) p_tuvs.get(i); Element root = (Element) roots.get(i); tuv.setSegment(root.asXML()); } }
From source file:com.globalsight.everest.tm.util.trados.TradosFmSgmlTmxToGxml.java
License:Apache License
/** * Main method to call, returns the new filename of the result. *///from w w w .j a v a 2 s. com public String convertToGxml(String p_url) throws Exception { final String baseName = getBaseName(p_url); final String extension = getExtension(p_url); info("Converting TMX file to GXML: `" + p_url + "'"); startOutputFile(baseName); m_entryCount = 0; // Reading from a file, need to use Xerces. SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); reader.setEntityResolver(DtdResolver.getInstance()); reader.setValidation(true); // enable element complete notifications to conserve memory reader.addHandler("/tmx", new ElementHandler() { final public void onStart(ElementPath path) { Element element = path.getCurrent(); m_version = element.attributeValue("version"); } final public void onEnd(ElementPath path) { } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/header", new ElementHandler() { final public void onStart(ElementPath path) { } final public void onEnd(ElementPath path) { Element element = path.getCurrent(); setOldHeader(element); createNewHeader(); // prune the current element to reduce memory element.detach(); element = null; } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/body/tu", new ElementHandler() { final public void onStart(ElementPath path) { ++m_entryCount; m_tuError = false; } final public void onEnd(ElementPath path) { Element element = path.getCurrent(); if (m_tuError) { m_errorCount++; } else { writeEntry(element.asXML()); } // prune the current element to reduce memory element.detach(); element = null; if (m_entryCount % 1000 == 0) { debug("Entry " + m_entryCount); } } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/body/tu/tuv/seg", new ElementHandler() { final public void onStart(ElementPath path) { } final public void onEnd(ElementPath path) { Element element = path.getCurrent(); try { String gxml = handleTuv(element); Document doc = parse("<root>" + gxml + "</root>"); // Remove old content of seg List content = element.content(); for (int i = content.size() - 1; i >= 0; --i) { ((Node) content.get(i)).detach(); } // Add new GXML content (backwards) content = doc.getRootElement().content(); Collections.reverse(content); for (int i = content.size() - 1; i >= 0; --i) { Node node = (Node) content.get(i); element.add(node.detach()); } } catch (Throwable ex) { m_tuError = true; } } }); Document document = reader.read(p_url); closeOutputFile(); info("Processed " + m_entryCount + " TUs " + "into file `" + m_filename + "', " + m_errorCount + " errors."); return m_filename; }
From source file:com.globalsight.everest.tm.util.trados.TradosFmTmxToGxml.java
License:Apache License
/** * Main method to call, returns the new filename of the result. *//* ww w . j a va 2 s. com*/ public String convertToGxml(String p_url) throws Exception { final String baseName = getBaseName(p_url); final String extension = getExtension(p_url); info("Converting TMX file to GXML: `" + p_url + "'"); startOutputFile(baseName); m_entryCount = 0; // Reading from a file, need to use Xerces. SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); reader.setEntityResolver(DtdResolver.getInstance()); reader.setValidation(true); // enable element complete notifications to conserve memory reader.addHandler("/tmx", new ElementHandler() { public void onStart(ElementPath path) { Element element = path.getCurrent(); m_version = element.attributeValue("version"); } public void onEnd(ElementPath path) { } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/header", new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element element = path.getCurrent(); setOldHeader(element); createNewHeader(); // prune the current element to reduce memory element.detach(); element = null; } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/body/tu", new ElementHandler() { public void onStart(ElementPath path) { ++m_entryCount; m_tuError = false; } public void onEnd(ElementPath path) { Element element = path.getCurrent(); if (m_tuError) { m_errorCount++; } else { writeEntry(element.asXML()); } // prune the current element to reduce memory element.detach(); element = null; if (m_entryCount % 1000 == 0) { debug("Entry " + m_entryCount); } } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/body/tu/tuv/seg", new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element element = path.getCurrent(); try { String gxml = handleTuv(element); Document doc = parse("<root>" + gxml + "</root>"); // Remove old content of seg List content = element.content(); for (int i = content.size() - 1; i >= 0; --i) { ((Node) content.get(i)).detach(); } // Add new GXML content (backwards) content = doc.getRootElement().content(); Collections.reverse(content); for (int i = content.size() - 1; i >= 0; --i) { Node node = (Node) content.get(i); element.add(node.detach()); } } catch (Throwable ex) { m_tuError = true; } } }); Document document = reader.read(p_url); closeOutputFile(); info("Processed " + m_entryCount + " TUs " + "into file `" + m_filename + "', " + m_errorCount + " errors."); return m_filename; }
From source file:com.globalsight.everest.tm.util.trados.TradosHtmlTmxToGxml.java
License:Apache License
/** * Main method to call, returns the new filename of the result. *///from w ww . j a v a2s.c o m public String convertToGxml(String p_url) throws Exception { final String baseName = getBaseName(p_url); final String extension = getExtension(p_url); info("Converting TMX file to GXML: `" + p_url + "'"); startOutputFile(baseName); m_entryCount = 0; // Reading from a file, need to use Xerces. SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); reader.setEntityResolver(DtdResolver.getInstance()); reader.setValidation(true); // enable element complete notifications to conserve memory reader.addHandler("/tmx", new ElementHandler() { public void onStart(ElementPath path) { Element element = path.getCurrent(); m_version = element.attributeValue("version"); } public void onEnd(ElementPath path) { } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/header", new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element element = path.getCurrent(); setOldHeader(element); createNewHeader(); // prune the current element to reduce memory element.detach(); element = null; } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/body/tu", new ElementHandler() { public void onStart(ElementPath path) { ++m_entryCount; m_tuError = false; } public void onEnd(ElementPath path) { Element element = path.getCurrent(); if (m_tuError) { m_errorCount++; } else { writeEntry(element.asXML()); } // prune the current element to reduce memory element.detach(); element = null; if (m_entryCount % 1000 == 0) { debug("Entry " + m_entryCount); } } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/body/tu/tuv/seg", new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element element = path.getCurrent(); try { element = removeUtElements(element); String gxml = handleTuv(element.getText()); Document doc = parse("<root>" + gxml + "</root>"); // Remove old content of seg List content = element.content(); for (int i = content.size() - 1; i >= 0; --i) { ((Node) content.get(i)).detach(); } // Add new GXML content (backwards) content = doc.getRootElement().content(); Collections.reverse(content); for (int i = content.size() - 1; i >= 0; --i) { Node node = (Node) content.get(i); element.add(node.detach()); } } catch (Throwable ex) { m_tuError = true; } } }); Document document = reader.read(p_url); closeOutputFile(); info("Processed " + m_entryCount + " TUs into file `" + m_filename + "', " + m_errorCount + " errors."); return m_filename; }