List of usage examples for org.dom4j Element getText
String getText();
From source file:com.globalsight.smartbox.bussiness.process.Usecase02PostProcess.java
License:Apache License
/** * Convert xml to csv or txt file/* ww w.j av a 2 s.co m*/ * * @param format * @param originFile * @return * @throws Exception */ private File convertXMLToCSVTXT(String format, String targetFile, String outputFilePath) throws Exception { File xmlFile = new File(targetFile); File outputFile = new File(outputFilePath); SAXReader saxReader = new SAXReader(); Document document = saxReader.read(xmlFile); Element aElement = document.getRootElement(); String encoding = aElement.attributeValue("BomInfo"); FileOutputStream fos = new FileOutputStream(outputFilePath); FileUtil.writeBom(fos, encoding); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fos, encoding)); List<Element> rows = aElement.elements("row"); for (Element row : rows) { List<String> rowStr = new ArrayList<String>(); rowStr.add(row.elementText("sid")); rowStr.add(row.elementText("sourceLocaleName")); rowStr.add(row.elementText("sourceLocaleCode")); rowStr.add(row.elementText("unknown")); rowStr.add(row.elementText("translationSource")); rowStr.add(row.elementText("targetLocale")); rowStr.add(row.elementText("creationDate")); List<Element> segments = row.elements("segment"); for (Element element : segments) { rowStr.add(element.getText()); } StringBuffer sb = new StringBuffer(); if ("csv".equals(format)) { for (String str : rowStr) { sb.append("\"").append(str).append("\"").append(","); } } else { for (String str : rowStr) { sb.append(str).append("|"); } } sb.deleteCharAt(sb.length() - 1); bw.write(sb.toString()); bw.newLine(); } bw.close(); fos.close(); return outputFile; }
From source file:com.globalsight.smartbox.util.WebClientHelper.java
License:Apache License
/** * Get File Profile Info from GS Server//ww w . java 2 s.c om * * @return * @throws Exception */ public static List<FileProfile> getFileProfileInfoFromGS() throws Exception { List<FileProfile> fileProfiles = new ArrayList<FileProfile>(); String fileProfileInfo = ambassador.getFileProfileInfoEx(accessToken); Document profileDoc = DocumentHelper.parseText(fileProfileInfo); List<Element> profileList = profileDoc.selectNodes("/fileProfileInfo/fileProfile"); for (Element node : profileList) { FileProfile fp = new FileProfile(); fp.setId(node.selectSingleNode("id").getText()); fp.setName(node.selectSingleNode("name").getText()); List<Element> extensions = node.selectNodes("fileExtensionInfo/fileExtension"); for (Element extension : extensions) { fp.addFileExtension(extension.getText()); } String sourceLocale = node.selectSingleNode("localeInfo/sourceLocale").getText(); fp.setSourceLocale(sourceLocale); List<Element> targetLocales = node.selectNodes("localeInfo/targetLocale"); for (Element targetLocale : targetLocales) { fp.addTargetLocale(targetLocale.getText()); } fileProfiles.add(fp); } return fileProfiles; }
From source file:com.globalsight.terminology.EntryUtils.java
License:Apache License
/** * Gets the entry's concept ID./*from w ww.ja va2s. c o m*/ * @return 0 if the entry has no ID yet, else a positive number. */ static public long getConceptId(Entry p_entry) throws TermbaseException { Document dom = p_entry.getDom(); Element root = dom.getRootElement(); Element concept = root.element("concept"); if (concept == null || concept.getText().length() == 0) { return 0; } else { return Long.parseLong(concept.getText()); } }
From source file:com.globalsight.terminology.EntryUtils.java
License:Apache License
/** * <p>Checks that an TBX entry is consistent with the termbase * definition (at least one term per language, languages defined * in termbase, required fields present, etc).</p> * * <p>For now, check that we have at least one term. General entry * structure validation should have been done by the XML Parser * that parsed the entry into DOM - using the EntryStructure * schema. We also remove empty fields and groups.</p> * * TO BE COMPLETED//from w ww .j a v a 2 s . co m */ public static void normalizeTbxEntry(Entry p_entry, Definition p_definition) throws TermbaseException { try { boolean ok = false; // Remove empty fields and then check if there's at least // one term in the entry. pruneEntry(p_entry); Document dom = p_entry.getDom(); Element root = dom.getRootElement(); //termEntry-level for (Iterator it = root.elementIterator(); it.hasNext();) { Element tnode = (Element) it.next(); if (tnode.getName().equals("langSet")) { //langSet-level for (Iterator it2 = tnode.elementIterator(); it2.hasNext();) { Element lnode = (Element) it2.next(); if (lnode.getName().equals("ntig")) { //ntig-level for (Iterator it3 = lnode.elementIterator(); it3.hasNext();) { Element ntignode = (Element) it3.next(); if (ntignode.getName().equals("termGrp")) { //term-node for (Iterator it4 = ntignode.elementIterator(); it4.hasNext();) { Element termnode = (Element) it4.next(); if (termnode.getName().equals("term")) { String text = termnode.getText(); if (text != null && text.length() > 0) { ok = true; break; } } } } if (ok) break; } if (!ok) invalidEntry("no terms defined"); } if (lnode.getName().equals("tig")) { //tig-level for (Iterator it3 = lnode.elementIterator(); it3.hasNext();) { Element tignode = (Element) it3.next(); if (tignode.getName().equals("term")) { String text = tignode.getText(); if (text != null && text.length() > 0) { ok = true; break; } } } } if (ok) break; } } if (ok) break; } if (!ok) { invalidEntry("no languages defined"); } } catch (TermbaseException e) { throw e; } catch (Exception e) { invalidEntry(e.getMessage()); } }
From source file:com.globalsight.terminology.EntryUtils.java
License:Apache License
/** * <p>Checks that an entry is consistent with the termbase * definition (at least one term per language, languages defined * in termbase, required fields present, etc).</p> * * <p>For now, check that we have at least one term. General entry * structure validation should have been done by the XML Parser * that parsed the entry into DOM - using the EntryStructure * schema. We also remove empty fields and groups.</p> * * TO BE COMPLETED//from w w w .j a v a2 s .c om */ static public void normalizeEntry(Entry p_entry, Definition p_definition) throws TermbaseException { try { boolean ok = false; // Remove empty fields and then check if there's at least // one term in the entry. pruneEntry(p_entry); Document dom = p_entry.getDom(); Element root = dom.getRootElement(); // CONCEPT-LEVEL for (Iterator it = root.elementIterator(); it.hasNext();) { Element cnode = (Element) it.next(); if (cnode.getName().equals("languageGrp")) { // LANGUAGE-LEVEL for (Iterator it2 = cnode.elementIterator(); it2.hasNext();) { Element lnode = (Element) it2.next(); if (lnode.getName().equals("termGrp")) { // TERM-LEVEL for (Iterator it3 = lnode.elementIterator(); it3.hasNext();) { Element tnode = (Element) it3.next(); if (tnode.getName().equals("term")) { String text = tnode.getText(); if (text != null && text.length() > 0) { ok = true; break; } } } } if (ok) break; } if (!ok) { invalidEntry("no terms defined"); } } if (ok) break; } if (!ok) { invalidEntry("no languages defined"); } } catch (TermbaseException e) { throw e; } catch (Exception e) { invalidEntry(e.getMessage()); } }
From source file:com.globalsight.terminology.EntryUtils.java
License:Apache License
/** * <p>Recursively prunes empty fields and groups from the given entry. * The entry is destructively modified.</p> * * <p>A depth-first traversal first removes empty leaf nodes, and * then groups that are empty or not fully filled.</p> * * <p>Example: a <descripGrp> must contain at least one <descrip> * child. A <languageGrp> must contain at least one <language> * and one <termGrp> child (2 children minimum).</p> * * <p>As of 6.2, non-relevant whitespace nodes are also removed.</p> * <p>As of 6.3, admissible empty HTML tags are not pruned: IMG, HR, BR.</p> *///w ww . ja v a 2s .c o m static private boolean pruneEmptyFields(Element p_node) { boolean dirty = false; if (!p_node.hasContent()) { return dirty; } // Cannot iterate child elements with node.elementIterator() // because that doesn't implement the remove() method. for (Iterator it = p_node.content().iterator(); it.hasNext();) { Node temp = (Node) it.next(); // Only work on child elements. if (temp.getNodeType() != Node.ELEMENT_NODE) { continue; } Element node = (Element) temp; // Depth-first recursion. dirty |= pruneEmptyFields(node); // Sat Jan 15 02:17:38 2005 CvdL Need to allow empty HTML tags. String name = node.getName().toLowerCase(); if (name.equals("language") || name.equals("img") || name.equals("hr") || name.equals("br")) { continue; } // Leaf nodes if (node.isTextOnly()) { String value = node.getText(); if (value == null || value.trim().length() == 0) { // prune empty leaf nodes it.remove(); dirty = true; } } else { // Group nodes int childCount = node.elements().size(); if (childCount == 0 || (node.getName().equals("languageGrp") && childCount < 2)) { // prune empty groups it.remove(); dirty = true; } } } return dirty; }
From source file:com.globalsight.terminology.exporter.MtfWriter.java
License:Apache License
/** * Converts a GlobalSight concept group to a MultiTerm iX concept * group. Differences://from w w w . j a va 2 s . c o m * * - concept level <descrip type="entryClass|status"> --> * <system type="entryClass|status"> * * - <language name="English" locale="en_US" /> --> * <language type="English" lang="EN" /> * * - <noteGrp><note> --> * <descripGrp><descrip type="note"></descripGrp> * * - <note> --> (should not be produced but could be in old data) * <descripGrp><descrip type="note"></descripGrp> * * - <sourceGrp><source></sourceGrp> --> * <descripGrp><descrip type="source"></descripGrp> * * - descripGrp is not recursive */ private Document convertToMtf(Document p_elem) { List nodes; Node node; Element root = p_elem.getRootElement(); Element elem; Iterator it; ListIterator lit; if (false && CATEGORY.isDebugEnabled()) { CATEGORY.debug("gt2mtf init: " + p_elem.asXML()); } // rewrite <descrip type=entryClass> (only one on concept level) nodes = root.selectNodes("descrip[@type='entryClass']"); for (it = nodes.iterator(); it.hasNext();) { elem = (Element) it.next(); Element parent = elem.getParent(); parent.remove(elem); parent.addElement("system").addAttribute("type", "entryClass").addText(elem.getText()); } // rewrite <descrip type=status> (?? used in MTF?) nodes = root.selectNodes("descrip[@type='status']"); for (it = nodes.iterator(); it.hasNext();) { elem = (Element) it.next(); Element parent = elem.getParent(); parent.remove(elem); parent.addElement("system").addAttribute("type", "status").addText(elem.getText()); } // rewrite <noteGrp> while (true) { // refresh the node list, we're rewriting the structure node = root.selectSingleNode("//noteGrp"); if (node == null) { break; } elem = (Element) node; Element parent = elem.getParent(); parent.remove(elem); Element newNote = parent.addElement("descripGrp"); Element note = null; // copy all child nodes but remember the <note> for (lit = elem.elements().listIterator(); lit.hasNext();) { Element child = (Element) lit.next(); if (child.getName().equals("note")) { note = child; } else { lit.remove(); newNote.add(child); } } // create new <descrip type="note"> with note's value newNote.addElement("descrip").addAttribute("type", "note").addText(note.getText()); } // rewrite single <note>, if any are left in the entry while (true) { // refresh the node list, we're rewriting the structure node = root.selectSingleNode("//note"); if (node == null) { break; } Element note = (Element) node; Element parent = note.getParent(); parent.remove(note); Element newNote = parent.addElement("descripGrp"); newNote.addElement("descrip").addAttribute("type", "note").addText(note.getText()); } // rewrite <sourceGrp> while (true) { // refresh the node list, we're rewriting the structure node = root.selectSingleNode("//sourceGrp"); if (node == null) { break; } elem = (Element) node; Element parent = elem.getParent(); parent.remove(elem); Element newSource = parent.addElement("descripGrp"); Element source = null; // copy all child nodes but remember the <source> for (lit = elem.elements().listIterator(); lit.hasNext();) { Element child = (Element) lit.next(); if (child.getName().equals("source")) { source = child; } else { lit.remove(); newSource.add(child); } } // create new <descrip type="source"> with source's value newSource.addElement("descrip").addAttribute("type", "source").addText(source.getText()); } // rewrite <language> nodes = root.selectNodes("//languageGrp/language"); for (it = nodes.iterator(); it.hasNext();) { elem = (Element) it.next(); Attribute nameAttr = elem.attribute("name"); Attribute langAttr = elem.attribute("locale"); String langName = nameAttr.getValue(); String langLocale = langAttr.getValue(); // locales in MTF consist of 2 letter codes (uppercase). langLocale = langLocale.substring(0, 2).toUpperCase(); elem.remove(nameAttr); elem.remove(langAttr); elem.addAttribute("type", langName); elem.addAttribute("lang", langLocale); } if (false && CATEGORY.isDebugEnabled()) { CATEGORY.debug("gt2mtf done: " + p_elem.asXML()); } return p_elem; }
From source file:com.globalsight.terminology.importer.MtfReaderThread.java
License:Apache License
/** * Converts a MultiTerm MTF concept group to a GlobalSight concept * group. Differences:/*from w w w. j a v a 2s . com*/ * * - <system type="entryClass|status"> --> * <descrip type="entryClass|status"> * * - <language type="English" lang="EN" /> --> * <language name="English" locale="en_US" /> * * - <descripGrp><descrip type="note"> --> * <noteGrp><note> * * - <descripGrp><descrip type="source"> --> * <sourceGrp><source> * * - descripGrp is recursive, must map to noteGrps or delete. * * - remove empty descripGrp <descrip type="Graphic"/> */ private Element convertMtf(Element p_elem) { List nodes; Node node; Element elem; Iterator it; ListIterator lit; // fix <system> nodes = p_elem.elements("system"); for (it = nodes.iterator(); it.hasNext();) { elem = (Element) it.next(); p_elem.remove(elem); p_elem.addElement("descrip").addAttribute("type", elem.attributeValue("type")).addText(elem.getText()); } // fix Graphic; we cannot handle them, so remove them nodes = p_elem.selectNodes("descripGrp[descrip/@type='Graphic']"); for (it = nodes.iterator(); it.hasNext();) { elem = (Element) it.next(); p_elem.remove(elem); } // convert <descripGrp><descrip type="note"> to noteGrp nodes = p_elem.selectNodes(".//descripGrp[descrip/@type='note']"); for (it = nodes.iterator(); it.hasNext();) { elem = (Element) it.next(); convertToNoteGrp(elem); } // convert <descripGrp><descrip type="source"> to sourceGrp nodes = p_elem.selectNodes(".//descripGrp[descrip/@type='source']"); for (it = nodes.iterator(); it.hasNext();) { elem = (Element) it.next(); convertToSourceGrp(elem); } // Convert recursive descripGrps to noteGrps if possible. convertRecursiveDescripGrps(p_elem, ".//conceptGrp/descripGrp"); convertRecursiveDescripGrps(p_elem, ".//languageGrp/descripGrp"); convertRecursiveDescripGrps(p_elem, ".//termGrp/descripGrp"); // Remove the recursive descripGrps that are left over. // (In case there are doubly recursive descrips and stuff.) removeRecursiveDescripGrps(p_elem); // fix <language> nodes = p_elem.selectNodes("languageGrp/language"); for (it = nodes.iterator(); it.hasNext();) { elem = (Element) it.next(); Attribute nameAttr = elem.attribute("type"); Attribute langAttr = elem.attribute("lang"); String langName = nameAttr.getValue(); String langLocale = langAttr.getValue(); // locales in entries consist of 2 letter codes. langLocale = langLocale.substring(0, 2).toLowerCase(); elem.remove(nameAttr); elem.remove(langAttr); elem.addAttribute("name", langName); elem.addAttribute("locale", langLocale); } return p_elem; }
From source file:com.globalsight.terminology.searchreplace.TbMaintance.java
License:Apache License
private void searchNode(Element node, ArrayList array, long conceptId, long levelId) { String field = node.getText(); String searchText = rp.getSearchText(); String fieldType = rp.getSearchType(); if (!field.isEmpty()) { //String name = node.getName(); boolean flag = false; if (rp.isCaseInsensitive()) { if (rp.isWholeWord()) { if (field.equals(searchText)) { flag = true;//from www.j a v a 2 s. co m } } else { if (field.indexOf(searchText) > -1) { flag = true; } } } else { if (rp.isWholeWord()) { if (field.toLowerCase().equals(searchText.toLowerCase())) { flag = true; } } else { if (field.toLowerCase().indexOf(searchText.toLowerCase()) > -1) { flag = true; } } } if (flag) { boolean judge = false; String typeValue = node.attributeValue("type"); if (typeValue == null) { //if attribute type is null, get the node name to judge String nodeName = node.getName(); typeValue = nodeName; } if (fieldType == null || fieldType.trim().equals("")) { //if no select field type, select all judge = true; } else { if (typeValue != null) { //if attribute type is not null, judge the type if (typeValue.toLowerCase().trim().indexOf(fieldType.toLowerCase().trim()) > -1) { judge = true; } } } if (judge) { SearchResult p_result = new SearchResult(); p_result.setConceptId(conceptId); p_result.setLevelId(levelId); p_result.setFiled(field); p_result.setType(typeValue); array.add(p_result); } } } else { List children = node.elements(); for (int i = 0, max = children.size(); i < max; i++) { Element child = (Element) children.get(i); searchNode(child, array, conceptId, levelId); } } }
From source file:com.globalsight.terminology.searchreplace.TbMaintance.java
License:Apache License
private void doNodeVistor(Element node, String oldFieldText, String replaceText) { String field = node.getText(); if (!field.isEmpty()) { if (field.indexOf(oldFieldText) > -1) { node.setText(replaceText);/* w ww . j av a 2s . com*/ } } else { List children = node.elements(); for (int i = 0, max = children.size(); i < max; i++) { Element child = (Element) children.get(i); doNodeVistor(child, oldFieldText, replaceText); } } }