List of usage examples for org.dom4j Node getNodeType
short getNodeType();
From source file:revaligner.service.FileAligner.java
public ArrayList<String[]> populateSourceTxlf() throws Exception { System.out.println("populating source txlf with aligned segments...."); ArrayList<String[]> reportStates = new ArrayList(); ExtractionSupportImpl extractionSupportImpl = new ExtractionSupportImpl( Locale.makeLocale(this.sourcelanguage), Locale.makeLocale(this.targetlanguage)); Configuration config = new BaseConfiguration(); config.setProperty("extraction.tokens.extract", "all"); extractionSupportImpl.setConfiguration(config); Locale locale = Locale.makeLocale(this.sourcelanguage); TradosWordCounter wcounter = new TradosWordCounter(locale, config); org.dom4j.Document document_src = XmlParser.parseXmlFile(this.sourcetxlf_nonSeg); org.dom4j.Element root_src = document_src.getRootElement(); org.dom4j.Document document_src_ingt = XmlParser.parseXmlFile(this.sourcetxlf_nonSeg); org.dom4j.Element root_src_ingt = document_src_ingt.getRootElement(); org.dom4j.Document document_src_seg = XmlParser.parseXmlFile(this.sourcetxlf_seg); org.dom4j.Element root_src_seg = document_src_seg.getRootElement(); List<com.aspose.words.Node> list_source = root_src .selectNodes("//*[name() = 'group'][@restype = 'x-paragraph']"); List<com.aspose.words.Node> list_source_ingt = root_src_ingt .selectNodes("//*[name() = 'group'][@restype = 'x-paragraph']"); List<com.aspose.words.Node> list_source_seg = root_src_seg .selectNodes("//*[name() = 'group'][@restype = 'x-paragraph']"); int count = 0; int totalWC = 0; org.dom4j.Document document = XmlParser.parseXmlFile(this.alignedfile); List<org.dom4j.Element> groups = document.getRootElement().element("aligned").elements("group"); for (int i = 0; i < groups.size(); i++) { org.dom4j.Element group = (org.dom4j.Element) groups.get(i); List<org.dom4j.Element> units = group.elements("unit"); if (((org.dom4j.Element) units.get(0)).element("src_para") != null) { boolean isParaAllSegmented = true; for (int j = 0; j < units.size(); j++) { if (((org.dom4j.Element) units.get(j)).attributeValue("alignsegs").equals("false")) { isParaAllSegmented = false; break; }//from w ww. j av a 2s . co m } String srcTextAccepted = group.elementText("text").replaceAll("(?s)<del>.*?</del>", "") .replaceAll("<(/)*ins>", ""); if (!extractionSupportImpl.isExtractable(srcTextAccepted)) { if (isParaAllSegmented) { for (int j = 0; j < units.size(); j++) { org.dom4j.Element unit = (org.dom4j.Element) units.get(j); List<org.dom4j.Element> srcsegs = unit.element("src_para").element("segments") .elements("src_seg"); List<org.dom4j.Element> trgsegs = unit.element("trg_para").element("segments") .elements("trg_seg"); for (int x = 0; x < srcsegs.size(); x++) { String[] s = new String[7]; s[0] = ((org.dom4j.Element) srcsegs.get(x)).getText(); if (x >= trgsegs.size()) { s[1] = ""; } else { org.dom4j.Element trgseg = (org.dom4j.Element) trgsegs.get(x); String id = trgseg.attributeValue("id"); if (id.startsWith("n - ")) { s[1] = trgseg.getText(); } else { List tmp_contents = new ArrayList(); if (id.contains(" - ")) { int start = Integer.parseInt(id.split(" - ")[0]); int end = Integer.parseInt(id.split(" - ")[1]); tmp_contents.addAll( (Collection) this.txlftrgsegmap.get(Integer.valueOf(start))); for (int su = start + 1; su <= end; su++) { boolean isprevendofpara = ((boolean[]) this.txlftrgsewsmap .get(Integer.valueOf(su - 1)))[1]; boolean iscurrentstartofpara = ((boolean[]) this.txlftrgsewsmap .get(Integer.valueOf(su)))[0]; if ((isprevendofpara) && (iscurrentstartofpara)) { List prevseg = (List) this.txlftrgsegmap .get(Integer.valueOf(su - 1)); int previdx = -1; for (int prev = 0; prev < prevseg.size(); prev++) { org.dom4j.Node prevnode = (org.dom4j.Node) prevseg .get(prev); if (prevnode.getNodeType() == 1) { org.dom4j.Element prevnode_e = (org.dom4j.Element) prevnode; if ((prevnode_e.getName().equals("ws")) && (prevnode_e .attributeValue("pos").equals("after"))) { previdx = prevseg.size() - prev; } } } if (previdx != -1) { tmp_contents.remove(tmp_contents.size() - previdx); } List currseg = (List) this.txlftrgsegmap .get(Integer.valueOf(su)); int curridx = -1; for (int curr = 0; curr < currseg.size(); curr++) { org.dom4j.Node currnode = (org.dom4j.Node) currseg .get(curr); if (currnode.getNodeType() == 1) { org.dom4j.Element currnode_e = (org.dom4j.Element) currnode; if ((currnode_e.getName().equals("ws")) && (currnode_e .attributeValue("pos").equals("before"))) { curridx = curr; } } } if (curridx != -1) { currseg.remove(curridx); } if (Locale.makeLocale(this.targetlanguage).isFarEast()) { tmp_contents.addAll(currseg); } else { tmp_contents.add(DocumentHelper.createText(" ")); tmp_contents.addAll(currseg); } } else { tmp_contents.addAll((Collection) this.txlftrgsegmap .get(Integer.valueOf(su))); } } } else { tmp_contents.addAll((Collection) this.txlftrgsegmap .get(Integer.valueOf(Integer.parseInt(id)))); } s[1] = trimText(assembleText(tmp_contents).replace("<br> ", "↵<br>"), false)[0]; } } s[2] = "N/A"; s[3] = "N/A"; s[4] = ((org.dom4j.Element) srcsegs.get(x)).attributeValue("tctype"); s[5] = "0"; s[6] = ""; reportStates.add(s); } } } else { String[] s = new String[7]; for (int j = 0; j < units.size(); j++) { s[0] = ((org.dom4j.Element) units.get(j)).element("src_para").elementText("text"); if (((org.dom4j.Element) units.get(j)).element("trg_para") != null) { s[1] = ((org.dom4j.Element) units.get(j)).element("trg_para").elementText("text"); } else { s[1] = ""; } s[2] = "N/A"; s[3] = "N/A"; s[4] = ((org.dom4j.Element) units.get(j)).element("src_para").attributeValue("tctype"); s[5] = "0"; s[6] = ""; reportStates.add(s); } } } else { if (isParaAllSegmented) { org.dom4j.Element txlf_group = (org.dom4j.Element) list_source.get(count); org.dom4j.Element txlf_group_ingt = (org.dom4j.Element) list_source_ingt.get(count); org.dom4j.Element txlf_group_seg = (org.dom4j.Element) list_source_seg.get(count); txlf_group.setContent(txlf_group_seg.content()); List transunits = txlf_group.elements("trans-unit"); txlf_group_ingt.setContent(txlf_group_seg.content()); List transunits_ingt = txlf_group_ingt.elements("trans-unit"); ArrayList<String> mergedsegtext = new ArrayList(); ArrayList<List> merged_trg_contents = new ArrayList(); ArrayList<String> mergedsegtctypes = new ArrayList(); ArrayList<String> keys = new ArrayList(); ArrayList<String> key_left = new ArrayList(); ArrayList<String> key_right = new ArrayList(); ArrayList<String> org_keys = new ArrayList(); ArrayList<String> trg_keys = new ArrayList(); ArrayList<List> trg_contents = new ArrayList(); ArrayList<String> src_tctypes = new ArrayList(); ArrayList<String> src_review_stats = new ArrayList(); ArrayList<String> src_ignore_stats = new ArrayList(); ArrayList<Integer> edited_idx = new ArrayList(); for (int j = 0; j < units.size(); j++) { org.dom4j.Element unit = (org.dom4j.Element) units.get(j); org.dom4j.Element src_para = unit.element("src_para"); org.dom4j.Element trg_para = unit.element("trg_para"); List src_segs = src_para.element("segments").elements("src_seg"); for (int z = 0; z < src_segs.size(); z++) { org.dom4j.Element src_seg = (org.dom4j.Element) src_segs.get(z); src_tctypes.add(src_seg.attributeValue("tctype")); src_review_stats.add(src_seg.attributeValue("needreview")); src_ignore_stats.add(src_seg.attributeValue("ignored")); keys.add(src_seg.getText().replaceAll("(?s)<del>.*?</del>", "") .replaceAll("<(/)*ins>", "").replace("<br>", "").trim()); org_keys.add(src_seg.getText()); if (trg_para != null) { List trg_segs = trg_para.element("segments").elements("trg_seg"); if (((org.dom4j.Element) trg_segs.get(z)).attributeValue("edited") .equals("true")) { edited_idx.add(Integer.valueOf(trg_contents.size())); } if (trg_segs.size() > z) { trg_keys.add(((org.dom4j.Element) trg_segs.get(z)).getText()); String id = ((org.dom4j.Element) trg_segs.get(z)).attributeValue("id"); if (id.startsWith("n - ")) { trg_contents.add(new ArrayList()); } else { List tmp_contents = new ArrayList(); if (id.contains(" - ")) { int start = Integer.parseInt(id.split(" - ")[0]); int end = Integer.parseInt(id.split(" - ")[1]); tmp_contents.addAll((Collection) this.txlftrgsegmap .get(Integer.valueOf(start))); for (int su = start + 1; su <= end; su++) { boolean isprevendofpara = ((boolean[]) this.txlftrgsewsmap .get(Integer.valueOf(su - 1)))[1]; boolean iscurrentstartofpara = ((boolean[]) this.txlftrgsewsmap .get(Integer.valueOf(su)))[0]; if ((isprevendofpara) && (iscurrentstartofpara)) { List prevseg = (List) this.txlftrgsegmap .get(Integer.valueOf(su - 1)); int previdx = -1; for (int prev = 0; prev < prevseg.size(); prev++) { org.dom4j.Node prevnode = (org.dom4j.Node) prevseg .get(prev); if (prevnode.getNodeType() == 1) { org.dom4j.Element prevnode_e = (org.dom4j.Element) prevnode; if ((prevnode_e.getName().equals("ws")) && (prevnode_e.attributeValue("pos") .equals("after"))) { previdx = prevseg.size() - prev; } } } if (previdx != -1) { tmp_contents.remove(tmp_contents.size() - previdx); } List currseg = (List) this.txlftrgsegmap .get(Integer.valueOf(su)); int curridx = -1; for (int curr = 0; curr < currseg.size(); curr++) { org.dom4j.Node currnode = (org.dom4j.Node) currseg .get(curr); if (currnode.getNodeType() == 1) { org.dom4j.Element currnode_e = (org.dom4j.Element) currnode; if ((currnode_e.getName().equals("ws")) && (currnode_e.attributeValue("pos") .equals("before"))) { curridx = curr; } } } if (curridx != -1) { currseg.remove(curridx); } if (Locale.makeLocale(this.targetlanguage).isFarEast()) { tmp_contents.addAll(currseg); } else { tmp_contents.add(DocumentHelper.createText(" ")); tmp_contents.addAll(currseg); } } else { tmp_contents.addAll((Collection) this.txlftrgsegmap .get(Integer.valueOf(su))); } } } else { tmp_contents.addAll((Collection) this.txlftrgsegmap .get(Integer.valueOf(Integer.parseInt(id)))); } trg_contents.add(tmp_contents); } } else { trg_keys.add(""); trg_contents.add(new ArrayList()); } } else { trg_keys.add(""); trg_contents.add(new ArrayList()); } if ((z == 0) && (z == src_segs.size() - 1)) { key_left.add(src_para.attributeValue("lefttrim")); key_right.add(src_para.attributeValue("righttrim")); } else if (z == 0) { key_left.add(src_para.attributeValue("lefttrim")); key_right.add("true"); } else if (z == src_segs.size() - 1) { key_left.add("true"); key_right.add(src_para.attributeValue("righttrim")); } else { key_left.add("true"); key_right.add("true"); } } } SegmenterFactory factory = new SegmenterFactory(); Configuration segconfig = createConfigForSegmenter(false, this.sourcelanguage); Segmenter segmenter = factory.getSegmenter("trados", Locale.makeLocale(this.sourcelanguage), segconfig); List<String> finsegs = segmenter .segment(group.elementText("text").replaceAll("(?s)<del>.*?</del>", "") .replaceAll("<(/)*ins>", "").replace("<br>", "").replace("<", "<") .replace(">", ">").replace("&", "&")); ArrayList<ArrayList<Integer>> indices = new ArrayList(); int key_start_index = 0; for (int k = 0; k < finsegs.size(); k++) { String finsegtext = ((String) finsegs.get(k)).replace("&", "&").replace("<", "<") .replace(">", ">"); String combined_key = ""; ArrayList<Integer> indice = new ArrayList(); for (int x = key_start_index; x < keys.size(); x++) { combined_key = combined_key + (String) keys.get(x); indice.add(Integer.valueOf(x)); if (combined_key.replace("", " ").trim().replaceAll("(\\s)+", "") .equals(finsegtext.replace("", " ").trim().replaceAll("(\\s)+", ""))) { indices.add(indice); key_start_index = x + 1; break; } } } ArrayList<Integer> merged_edited_idx = new ArrayList(); ArrayList<String[]> statss = new ArrayList(); for (int m = 0; m < indices.size(); m++) { boolean iscontentsuseable = true; ArrayList<Integer> temp_indice = (ArrayList) indices.get(m); String temp_src = ""; String temp_org_src = ""; String temp_trg = ""; List temp_trg_content = new ArrayList(); int id = 1; int rid = 1; int bxrid = 1; int bptrid = 1; int bxid = 1; int bptid = 1; HashMap<String, String> map_rid = new HashMap(); String temp_tctype = (String) src_tctypes .get(((Integer) temp_indice.get(0)).intValue()); String temp_review_stats = (String) src_review_stats .get(((Integer) temp_indice.get(0)).intValue()); for (Iterator localIterator = temp_indice.iterator(); localIterator.hasNext();) { int it = ((Integer) localIterator.next()).intValue(); temp_tctype = temp_tctype.equals(src_tctypes.get(it)) ? temp_tctype : "MIX"; temp_review_stats = ((String) src_review_stats.get(it)).equals("true") ? "true" : temp_review_stats.equals("true") ? "true" : "false"; String temp_ignore_stats = (String) src_ignore_stats.get(it); if (edited_idx.contains(Integer.valueOf(it))) { iscontentsuseable = false; } temp_src = temp_src + (String) keys.get(it); temp_org_src = temp_org_src + (String) org_keys.get(it); if (temp_ignore_stats.equals("true")) { temp_trg = temp_trg + "[skipseg]"; temp_trg_content.add(DocumentHelper.createText("[skipseg]")); } else { temp_trg = temp_trg + (String) trg_keys.get(it); List trg_content = (List) trg_contents.get(it); for (int nc = 0; nc < trg_content.size(); nc++) { org.dom4j.Node raw = (org.dom4j.Node) trg_content.get(nc); if (raw.getNodeType() == 3) { temp_trg_content.add(raw); } else if (raw.getNodeType() == 1) { org.dom4j.Element rawe = (org.dom4j.Element) raw; if (rawe.getName().equals("source")) { for (int ncc = 0; ncc < rawe.content().size(); ncc++) { org.dom4j.Node node = (org.dom4j.Node) rawe.content().get(ncc); if (node.getNodeType() == 3) { temp_trg_content.add(node); } else if (node.getNodeType() == 1) { org.dom4j.Element e = (org.dom4j.Element) node; if (!e.getName().equals("x")) { if (!e.getName().equals("ph")) { if (e.getName().equals("bx")) { if ((e.attribute("fake") != null) && (e.attributeValue("fake") .equals("true")) && (temp_indice.indexOf( Integer.valueOf(it)) != 0)) { continue; } if ((e.attribute("fake") == null) || (!e.attributeValue("fake") .equals("true"))) { } } else if (e.getName().equals("ex")) { if ((e.attribute("fake") != null) && (e.attributeValue("fake") .equals("true")) && (temp_indice.indexOf(Integer.valueOf( it)) != temp_indice.size() - 1)) { continue; } if ((e.attribute("fake") == null) || (!e.attributeValue("fake") .equals("true"))) { } } else if (e.getName().equals("bpt")) { if ((e.attribute("fake") != null) && (e.attributeValue("fake") .equals("true")) && (temp_indice.indexOf( Integer.valueOf(it)) != 0)) { continue; } if ((e.attribute("fake") == null) || (!e.attributeValue("fake") .equals("true"))) { } } else if (e.getName().equals("ept")) { if ((e.attribute("fake") != null) && (e.attributeValue("fake") .equals("true")) && (temp_indice.indexOf(Integer.valueOf( it)) != temp_indice.size() - 1)) { continue; } if ((e.attribute("fake") == null) || (!e.attributeValue("fake") .equals("true"))) { } } } } if (e.attribute("fake") != null) { e.remove(e.attribute("fake")); } temp_trg_content.add(e); } } } else if (rawe.getName().equals("ws")) { String pos = rawe.attributeValue("pos"); if (pos.equals("before")) { for (int ncc = 0; ncc < rawe.content().size(); ncc++) { org.dom4j.Node node = (org.dom4j.Node) rawe.content() .get(ncc); if (node.getNodeType() == 3) { temp_trg_content.add(0, node); } else if (node.getNodeType() == 1) { org.dom4j.Element e = (org.dom4j.Element) node; if ((!e.getName().equals("x")) && (e.getName().equals("it"))) { if (e.attributeValue("pos").equals("open")) { if ((e.attribute("fake") != null) && (e.attributeValue("fake") .equals("true")) && (temp_indice.indexOf( Integer.valueOf(it)) != 0)) { continue; } if (e.getText().equals("")) { e.setName("bx"); } else { e.setName("bpt"); } if ((e.attribute("fake") == null) || (!e.attributeValue("fake") .equals("true"))) { } } else if (e.attributeValue("pos") .equals("close")) { if ((e.attribute("fake") != null) && (e.attributeValue("fake") .equals("true")) && (temp_indice.indexOf(Integer.valueOf( it)) != temp_indice.size() - 1)) { continue; } if (e.getText().equals("")) { e.setName("ex"); } else { e.setName("ept"); } e.remove(e.attribute("ctype")); if ((e.attribute("fake") == null) || (!e.attributeValue("fake") .equals("true"))) { } } e.remove(e.attribute("pos")); } else { if (e.attribute("fake") != null) { e.remove(e.attribute("fake")); } temp_trg_content.add(0, e); } } } } else if (pos.equals("after")) { for (int ncc = 0; ncc < rawe.content().size(); ncc++) { org.dom4j.Node node = (org.dom4j.Node) rawe.content() .get(ncc); if (node.getNodeType() == 3) { temp_trg_content.add(node); } else if (node.getNodeType() == 1) { org.dom4j.Element e = (org.dom4j.Element) node; if ((!e.getName().equals("x")) && (e.getName().equals("it"))) { if (e.attributeValue("pos").equals("open")) { if ((e.attribute("fake") != null) && (e.attributeValue("fake") .equals("true")) && (temp_indice.indexOf( Integer.valueOf(it)) != 0)) { continue; } if (e.getText().equals("")) { e.setName("bx"); } else { e.setName("bpt"); } if ((e.attribute("fake") == null) || (!e.attributeValue("fake") .equals("true"))) { } } else if (e.attributeValue("pos") .equals("close")) { if ((e.attribute("fake") != null) && (e.attributeValue("fake") .equals("true")) && (temp_indice.indexOf(Integer.valueOf( it)) != temp_indice.size() - 1)) { continue; } if (e.getText().equals("")) { e.setName("ex"); } else { e.setName("ept"); } e.remove(e.attribute("ctype")); if ((e.attribute("fake") == null) || (!e.attributeValue("fake") .equals("true"))) { } } e.remove(e.attribute("pos")); } else { if (e.attribute("fake") != null) { e.remove(e.attribute("fake")); } temp_trg_content.add(e); } } } } } } } } } String[] stats = TrackChangeHelper.getTxlfTrgStatsFromTCType(temp_tctype, temp_trg); if ((stats[0].equals("1")) && (temp_review_stats.equals("true"))) { stats[2] = "fuzzy-match"; } String[] s = new String[7]; s[0] = temp_org_src.replace("<br> ", "↵<br>"); if (iscontentsuseable) { s[1] = trimText(assembleText(temp_trg_content).replace("<br> ", "↵<br>"), false)[0]; } else { s[1] = temp_trg.replace("<br> ", "↵<br>"); } if (s[1].contains("[skipseg]")) { if (s[1].replace("[skipseg]", "").trim().equals("")) { s[1] = ""; temp_trg_content = new ArrayList(); temp_trg_content.add(DocumentHelper.createText("")); temp_trg = ""; stats[0] = "1"; stats[1] = "translated"; stats[2] = "exact-match"; } else { s[1] = s[1].replace("[skipseg]", ""); temp_trg_content = replacetextinDomObj(temp_trg_content); temp_trg = temp_trg.replace("[skipseg]", ""); } } s[2] = stats[0]; s[3] = stats[2]; s[4] = temp_tctype; wcounter = new TradosWordCounter(locale, config); wcounter.countText(((org.dom4j.Element) transunits.get(m)).element("source").getText()); s[5] = Integer.toString(wcounter.getWordCount()); s[6] = ""; totalWC += wcounter.getWordCount(); reportStates.add(s); if (extractionSupportImpl.isExtractable(temp_src)) { mergedsegtext.add(temp_trg); if (!iscontentsuseable) { merged_edited_idx.add(Integer.valueOf(merged_trg_contents.size())); } merged_trg_contents.add(temp_trg_content); mergedsegtctypes.add(temp_tctype); statss.add(stats); } } for (int t = 0; t < transunits.size(); t++) { org.dom4j.Element trans_unit = (org.dom4j.Element) transunits.get(t); org.dom4j.Element trans_unit_ignt = (org.dom4j.Element) transunits_ingt.get(t); trans_unit.addAttribute("gs4tr:editStatus", "leveraged"); org.dom4j.Element source = trans_unit.element("source"); org.dom4j.Element target = trans_unit.addElement("target"); trans_unit.elements().add(source.indexOf(source.getParent()) + 2, target.clone()); trans_unit.remove(target); target = trans_unit.element("target"); org.dom4j.Element target_ignt = trans_unit_ignt.addElement("target"); trans_unit_ignt.elements().add(source.indexOf(source.getParent()) + 2, target_ignt.clone()); trans_unit_ignt.remove(target_ignt); target_ignt = trans_unit_ignt.element("target"); if (merged_edited_idx.contains(Integer.valueOf(t))) { target.setText(((String) mergedsegtext.get(t)).replace("<", "<") .replace(">", ">").replace("&", "&").trim()); target_ignt.setText(((String) mergedsegtext.get(t)).replace("<", "<") .replace(">", ">").replace("&", "&").trim()); } else { target.setContent(trimContents((List) merged_trg_contents.get(t))); target_ignt.setContent(trimContents((List) merged_trg_contents.get(t))); if (!((String[]) statss.get(t))[0].equals("75")) { org.dom4j.Element source_ingt = trans_unit_ignt.element("source"); source_ingt.setContent(trimContents((List) merged_trg_contents.get(t))); } } String[] stats = (String[]) statss.get(t); if (stats[0].equals("1")) { trans_unit.addAttribute("gs4tr:locked", "true"); } target.addAttribute("gs4tr:score", stats[0]); target.addAttribute("state", stats[1]); target.addAttribute("state-qualifier", stats[2]); if (stats[0].equals("0")) { trans_unit.remove(target); } } } else { String trgtext = ""; if (((org.dom4j.Element) units.get(0)).element("trg_para") != null) { trgtext = ((org.dom4j.Element) units.get(0)).element("trg_para").elementText("text"); } String temp_tctype = ((org.dom4j.Element) units.get(0)).element("src_para") .attributeValue("tctype"); for (int j = 1; j < units.size(); j++) { org.dom4j.Element prev_unit = (org.dom4j.Element) units.get(j - 1); org.dom4j.Element unit = (org.dom4j.Element) units.get(j); String src_tctype = unit.element("src_para").attributeValue("tctype"); temp_tctype = temp_tctype.equals(src_tctype) ? temp_tctype : "MIX"; if (unit.element("trg_para") != null) { String Rtrim = prev_unit.element("src_para").attributeValue("righttrim"); String Ltrim = unit.element("src_para").attributeValue("lefttrim"); if ((Rtrim.equals("true")) || (Ltrim.equals("true"))) { trgtext = trgtext + " " + unit.element("trg_para").elementText("text"); } else { trgtext = trgtext + unit.element("trg_para").elementText("text"); } } } org.dom4j.Element txlf_group = (org.dom4j.Element) list_source.get(count); org.dom4j.Element trans_unit = txlf_group.element("trans-unit"); trans_unit.addAttribute("gs4tr:editStatus", "leveraged"); org.dom4j.Element source = trans_unit.element("source"); org.dom4j.Element target = trans_unit.addElement("target"); trans_unit.elements().add(source.indexOf(source.getParent()) + 2, target.clone()); trans_unit.remove(target); target = trans_unit.element("target"); int lb_cnt = 0; String surfix = trgtext; while (surfix.indexOf("<br> ") != -1) { lb_cnt++; int pos = surfix.indexOf("<br> "); String prefix = surfix.substring(0, pos); target.addText(prefix.replace("<", "<").replace(">", ">").replace("&", "&")); org.dom4j.Element x = target.addElement("x"); x.addAttribute("ctype", "lb"); x.addAttribute("id", Integer.toString(lb_cnt)); x.addAttribute("equiv-text", " "); surfix = surfix.substring(pos + 5, surfix.length()); } target.addText(surfix.replace("<", "<").replace(">", ">").replace("&", "&")); String[] stats = TrackChangeHelper.getTxlfTrgStatsFromTCType(temp_tctype, trgtext); target.addAttribute("gs4tr:score", stats[0]); target.addAttribute("state", stats[1]); target.addAttribute("state-qualifier", stats[2]); String[] s = new String[7]; s[0] = group.elementText("text").replace("<br> ", "↵<br>"); s[1] = trgtext.replace("<br> ", "↵<br>"); s[2] = stats[0]; s[3] = stats[2]; s[4] = temp_tctype; wcounter = new TradosWordCounter(locale, config); wcounter.countText(source.getText()); s[5] = Integer.toString(wcounter.getWordCount()); s[6] = ""; totalWC += wcounter.getWordCount(); reportStates.add(s); } count++; } } } root_src.element("file").addAttribute("gs4tr:wordcount", Integer.toString(totalWC)); fixTxlfTrgTags(document_src); this.populatedsourcetxlf = (this.sourcefile + ".txlf"); if (new File(this.populatedsourcetxlf).exists()) { new File(this.populatedsourcetxlf).delete(); } OutputStreamWriter writer = new OutputStreamWriter( new BufferedOutputStream(new FileOutputStream(this.populatedsourcetxlf)), "UTF8"); document_src.write(writer); writer.close(); removeBlankLinesAndNameSpace(this.populatedsourcetxlf); root_src_ingt.element("file").addAttribute("gs4tr:wordcount", Integer.toString(totalWC)); fixTxlfTrgTags(document_src_ingt); String ingtfile = this.sourcefile + ".ingt.txlf"; if (new File(ingtfile).exists()) { new File(ingtfile).delete(); } OutputStreamWriter writer_ingt = new OutputStreamWriter( new BufferedOutputStream(new FileOutputStream(ingtfile)), "UTF8"); document_src_ingt.write(writer_ingt); writer_ingt.close(); removeBlankLinesAndNameSpace(ingtfile); return reportStates; }
From source file:revaligner.service.FileAligner.java
public void fixTxlfTrgTags(org.dom4j.Document doc) throws Exception { org.dom4j.Element root = doc.getRootElement(); List<org.dom4j.Element> list_transunit = root.selectNodes("//*[name() = 'trans-unit']"); for (org.dom4j.Element e : list_transunit) { org.dom4j.Element source = e.element("source"); org.dom4j.Element target = e.element("target"); if ((target != null) && (!target.getText().equals(""))) { HashMap<String, List<Object>> src_map = new HashMap(); boolean tagnodeappear = false; boolean textnodeappear = false; boolean sourcetagsurroundtext = true; int maxid = 1; for (int i = 0; i < source.content().size(); i++) { org.dom4j.Node node = (org.dom4j.Node) source.content().get(i); if (node.getNodeType() == 1) { org.dom4j.Element tag = (org.dom4j.Element) node; maxid = Math.max(maxid, Integer.parseInt(tag.attributeValue("id"))); }//ww w. j av a 2s . c o m } maxid++; for (int i = 0; i < source.content().size(); i++) { org.dom4j.Node node = (org.dom4j.Node) source.content().get(i); if (node.getNodeType() == 1) { if (textnodeappear) { tagnodeappear = true; } org.dom4j.Element tag = (org.dom4j.Element) node; String key = ""; if ((tag.getName().equals("ex")) || (tag.getName().equals("ept"))) { key = tag.getName() + "#" + tag.attributeValue("rid"); } else if (tag.getName().equals("x")) { key = tag.getName() + "#" + tag.attributeValue("ctype"); } else { key = tag.getName(); } if (src_map.containsKey(key)) { ((List) src_map.get(key)).add(node.clone()); } else { List<Object> list = new ArrayList(); list.add(node.clone()); src_map.put(key, list); } } else { if (tagnodeappear) { sourcetagsurroundtext = false; } textnodeappear = true; } } HashMap<String, String> rid_map = new HashMap(); List trg_contents = new ArrayList(); boolean targetnotag = true; for (int i = 0; i < target.content().size(); i++) { org.dom4j.Node node = (org.dom4j.Node) target.content().get(i); if (node.getNodeType() == 1) { org.dom4j.Element tag = (org.dom4j.Element) node; String rid = tag.attributeValue("rid"); String key = ""; if ((tag.getName().equals("ph")) || (tag.getName().equals("bpt")) || (tag.getName().equals("bx"))) { key = tag.getName(); if (src_map.containsKey(key)) { List list = (List) src_map.get(key); trg_contents.add(list.get(0)); targetnotag = false; rid_map.put(rid, ((org.dom4j.Element) list.get(0)).attributeValue("rid")); list.remove(0); if (list.size() == 0) { src_map.remove(key); } } } else if (tag.getName().equals("x")) { key = tag.getName() + "#" + tag.attributeValue("ctype"); if (src_map.containsKey(key)) { List list = (List) src_map.get(key); trg_contents.add(list.get(0)); targetnotag = false; list.remove(0); if (list.size() == 0) { src_map.remove(key); } } else if ((tag.getName().equals("x")) && (tag.attributeValue("ctype").equals("x-tab"))) { org.dom4j.Element tab = DocumentHelper.createElement("x"); tab.addAttribute("ctype", "x-tab"); tab.addAttribute("id", Integer.toString(maxid)); maxid++; tab.addAttribute("equiv-text", " "); trg_contents.add(tab); } else if ((tag.getName().equals("x")) && (tag.attributeValue("ctype").equals("lb"))) { org.dom4j.Element lb = DocumentHelper.createElement("x"); lb.addAttribute("ctype", "lb"); lb.addAttribute("id", Integer.toString(maxid)); maxid++; lb.addAttribute("equiv-text", " "); trg_contents.add(lb); } } else if ((tag.getName().equals("ex")) || (tag.getName().equals("ept"))) { String mapped_rid = (String) rid_map.get(tag.attributeValue("rid")); key = tag.getName() + "#" + mapped_rid; if (src_map.containsKey(key)) { List list = (List) src_map.get(key); trg_contents.add(list.get(0)); rid_map.put(rid, ((org.dom4j.Element) list.get(0)).attributeValue("rid")); list.remove(0); if (list.size() == 0) { src_map.remove(key); } } } } else if (node.getNodeType() == 3) { String text = node.getText(); if (text.contains("<br> ")) { String key = "x#lb"; String[] ss = text.split("<br> "); for (int s = 0; s < ss.length; s++) { trg_contents.add(DocumentHelper.createText(ss[s])); if (s < ss.length - 1) { if (src_map.containsKey(key)) { List list = (List) src_map.get(key); trg_contents.add(list.get(0)); targetnotag = false; list.remove(0); if (list.size() == 0) { src_map.remove(key); } } else { org.dom4j.Element lb = DocumentHelper.createElement("x"); lb.addAttribute("ctype", "lb"); lb.addAttribute("id", Integer.toString(maxid)); maxid++; lb.addAttribute("equiv-text", " "); trg_contents.add(lb); } } } } else if (text.contains("<br>")) { String key = "x#lb"; String[] ss = text.split("<br> "); for (int s = 0; s < ss.length; s++) { trg_contents.add(DocumentHelper.createText(ss[s])); if (s < ss.length - 1) { if (src_map.containsKey(key)) { List list = (List) src_map.get(key); trg_contents.add(list.get(0)); targetnotag = false; list.remove(0); if (list.size() == 0) { src_map.remove(key); } } else { org.dom4j.Element lb = DocumentHelper.createElement("x"); lb.addAttribute("ctype", "lb"); lb.addAttribute("id", Integer.toString(maxid)); maxid++; lb.addAttribute("equiv-text", " "); trg_contents.add(lb); } } } } else { trg_contents.add(node); } } else { trg_contents.add(node); } } while (trg_contents.size() > 0) { org.dom4j.Node nd = (org.dom4j.Node) trg_contents.get(0); if (nd.getNodeType() == 3) { if (nd.getText().trim().equals("")) { trg_contents.remove(0); } else { nd.setText(nd.getText().replaceAll("^(\\s)+", "")); break; } } else if ((nd.getNodeType() == 1) && (nd.getName().equals("x")) && (((org.dom4j.Element) nd).attributeValue("ctype").equals("lb"))) { trg_contents.remove(0); } else { if ((nd.getNodeType() != 1) || (!nd.getName().equals("x")) || (!((org.dom4j.Element) nd).attributeValue("ctype").equals("x-tab"))) { break; } trg_contents.remove(0); } } while (trg_contents.size() > 0) { org.dom4j.Node nd = (org.dom4j.Node) trg_contents.get(trg_contents.size() - 1); if (nd.getNodeType() == 3) { if (nd.getText().trim().equals("")) { trg_contents.remove(trg_contents.size() - 1); } else { nd.setText(nd.getText().replaceAll("(\\s)+$", "")); break; } } else if ((nd.getNodeType() == 1) && (nd.getName().equals("x")) && (((org.dom4j.Element) nd).attributeValue("ctype").equals("lb"))) { trg_contents.remove(trg_contents.size() - 1); } else { if ((nd.getNodeType() != 1) || (!nd.getName().equals("x")) || (!((org.dom4j.Element) nd).attributeValue("ctype").equals("x-tab"))) { break; } trg_contents.remove(trg_contents.size() - 1); } } boolean isnotignoredsegment = true; if (trg_contents.size() == 1) { org.dom4j.Node node = (org.dom4j.Node) trg_contents.get(0); if ((node.getNodeType() == 3) && (node.getText().replaceFirst("", "").equals(""))) { isnotignoredsegment = false; } } if ((targetnotag) && (sourcetagsurroundtext) && (isnotignoredsegment)) { boolean isleadingtag = true; int index = 0; for (int i = 0; i < source.content().size(); i++) { org.dom4j.Node node = (org.dom4j.Node) source.content().get(i); if (node.getNodeType() == 1) { if (isleadingtag) { trg_contents.add(index, node); index++; } else { trg_contents.add(node); } } else { isleadingtag = false; index++; } } } target.setContent(trg_contents); } } }
From source file:ru.apertum.qsystem.client.forms.FBoardParams.java
License:Open Source License
/** * XML/*w ww . j a va2s. c o m*/ */ private void saveXML() { if (params != null) { if (Uses.elementsByAttr(params, Uses.TAG_BOARD_NAME, Uses.TAG_BOARD_FRACTAL).size() > 0) { Uses.elementsByAttr(params, Uses.TAG_BOARD_NAME, Uses.TAG_BOARD_FRACTAL).get(0) .addAttribute(Uses.TAG_BOARD_VALUE, tfFRactal.getText()); } Uses.elementsByAttr(params, Uses.TAG_BOARD_NAME, Uses.TAG_BOARD_RUNNING_TEXT).get(0) .addAttribute(Uses.TAG_BOARD_VALUE, textFieldRunning.getText()); Uses.elementsByAttr(params, Uses.TAG_BOARD_NAME, Uses.TAG_BOARD_FON_IMG).get(0) .addAttribute(Uses.TAG_BOARD_VALUE, textFieldPict.getText()); // CDATA for (int i = 0; i < params.nodeCount(); i++) { final Node node = params.node(i); if (node.getNodeType() == Node.CDATA_SECTION_NODE) { params.remove(node); } } final String str = textAreaHtml.getText(); if (!"".equals(str)) { params.addCDATA(str); } Uses.elementsByAttr(params, Uses.TAG_BOARD_NAME, Uses.TAG_BOARD_VIDEO_FILE).get(0) .addAttribute(Uses.TAG_BOARD_VALUE, textFieldVideo.getText()); Uses.elementsByAttr(params, Uses.TAG_BOARD_NAME, Uses.TAG_BOARD_FONT_SIZE).get(0) .addAttribute(Uses.TAG_BOARD_VALUE, String.valueOf((Integer) spinnerFontSize.getValue())); Uses.elementsByAttr(params, Uses.TAG_BOARD_NAME, Uses.TAG_BOARD_SPEED_TEXT).get(0) .addAttribute(Uses.TAG_BOARD_VALUE, String.valueOf((Integer) spinnerSpeed.getValue())); Uses.elementsByAttr(params, Uses.TAG_BOARD_NAME, Uses.TAG_BOARD_FONT_COLOR).get(0) .addAttribute(Uses.TAG_BOARD_VALUE, textFieldFontColor.getText()); Uses.elementsByAttr(params, Uses.TAG_BOARD_NAME, Uses.TAG_BOARD_SIMPLE_DATE).get(0) .addAttribute(Uses.TAG_BOARD_VALUE, checkBoxDate.isSelected() ? "1" : "0"); Uses.elementsByAttr(params, Uses.TAG_BOARD_NAME, Uses.TAG_BOARD_GRID_NEXT).get(0) .addAttribute(Uses.TAG_BOARD_VALUE, checkBoxGridNext.isSelected() ? "1" : "0"); Uses.elementsByAttr(params, Uses.TAG_BOARD_NAME, Uses.TAG_BOARD_GRID_NEXT_COLS).get(0) .addAttribute(Uses.TAG_BOARD_VALUE, String.valueOf((Integer) spinnerGridNextCols.getValue())); Uses.elementsByAttr(params, Uses.TAG_BOARD_NAME, Uses.TAG_BOARD_GRID_NEXT_ROWS).get(0) .addAttribute(Uses.TAG_BOARD_VALUE, String.valueOf((Integer) spinnerGridNextRows.getValue())); } }
From source file:treesim.TreeSim.java
private static void preParentOrderFirstDOM(Node n) { if (n != null) { if (n.getParent() == null) { System.out.println("ROOT -- \"" + n.getName() + "\""); elementNodes.add(n.getName()); } else {//from w ww .j a va 2 s.c o m System.out.println("\"" + n.getParent().getName() + "\" -- \"" + n.getName() + "\""); elementNodes.add(n.getName()); } for (Object a : ((Element) n).attributes()) { System.out.println("\"" + n.getName() + "\" -- \"" + ((Attribute) a).getName() + " " + ((Attribute) a).getValue() + "\""); attributeParentNodes.add(n.getName()); attributeNodes.add(((Attribute) a).getName()); } if (!n.getText().trim().equals("")) { System.out.println("\"" + n.getName() + "\" -- \"" + n.getText().trim() + "\""); textNodeConnections.add(n.getName()); } for (Object o : n.selectNodes("child::*")) { if (n.getNodeType() == Node.ELEMENT_NODE) { preParentOrderFirstDOM((Node) o); } } } }
From source file:treesim.TreeSim.java
private static void preParentOrderSecondDOM(Node n) { if (n != null) { if (n.getParent() == null) { System.out.println("ROOT -- \"" + n.getName() + "\""); elementNodes2.add(n.getName()); } else {/*from w w w . j av a2s .c o m*/ System.out.println("\"" + n.getParent().getName() + "\" -- \"" + n.getName() + "\""); elementNodes2.add(n.getName()); } for (Object a : ((Element) n).attributes()) { System.out.println("\"" + n.getName() + "\" -- \"" + ((Attribute) a).getName() + " " + ((Attribute) a).getValue() + "\""); attributeParentNodes2.add(n.getName()); attributeNodes2.add(((Attribute) a).getName()); } if (!n.getText().trim().equals("")) { System.out.println("\"" + n.getName() + "\" -- \"" + n.getText().trim() + "\""); textNodeConnections2.add(n.getName()); } for (Object o : n.selectNodes("child::*")) { if (n.getNodeType() == Node.ELEMENT_NODE) { preParentOrderSecondDOM((Node) o); } } } }