List of usage examples for org.dom4j ElementHandler ElementHandler
ElementHandler
From source file:com.globalsight.terminology.util.MtfAnalyzer.java
License:Apache License
public void analyze(String p_url) throws Exception { m_entryCount = 0;/* ww w.j a v a2 s . c om*/ SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); System.err.println("Analyzing document: " + p_url); // enable element complete notifications to conserve memory reader.addHandler("/mtf/conceptGrp", new ElementHandler() { public void onStart(ElementPath path) { ++m_entryCount; if (m_entryCount % 200 == 0) { log("Entry " + m_entryCount); } } public void onEnd(ElementPath path) { Element element = path.getCurrent(); // prune the current element to reduce memory element.detach(); element = null; } }); Document document = reader.read(p_url); log("Total entries: " + m_entryCount); // all done }
From source file:com.globalsight.terminology.util.MtfSplitter.java
License:Apache License
public void split(String p_url, String p_numEntries) throws Exception { final int maxEntries = Integer.parseInt(p_numEntries); final String baseName = getBaseName(p_url); final String extension = getExtension(p_url); m_entryCount = 0;/*from www.j a va 2s. com*/ SAXReader reader = new SAXReader(); reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser"); log("Splitting document `" + p_url + "'"); startFile(baseName, extension); // enable element complete notifications to conserve memory reader.addHandler("/mtf/conceptGrp", new ElementHandler() { public void onStart(ElementPath path) { ++m_entryCount; if (m_entryCount % maxEntries == 0) { try { closeFile(); startFile(baseName, extension); } catch (Exception ex) { log(ex.toString()); System.exit(1); } } } public void onEnd(ElementPath path) { Element element = path.getCurrent(); writeEntry(element.asXML()); // prune the current element to reduce memory element.detach(); element = null; } }); Document document = reader.read(p_url); closeFile(); // all done }
From source file:com.globalsight.webservices.Ambassador.java
License:Apache License
/** * Updates a tu in database./*from w w w .j av a 2 s .c om*/ * * @param accessToken * To judge caller has logon or not, can not be null. you can get * it by calling method <code>login(username, password)</code>. * @param tmName * TM name, will used to get tm id. * @param companyName * company name, will used to get tm id. * @param tmx * A tmx formate string inlcluding all tu information. * @return "true" if succeed * @throws WebServiceException */ public String editTu(String accessToken, String tmName, String companyName, String tmx) throws WebServiceException { try { Assert.assertNotEmpty(accessToken, "access token"); Assert.assertNotEmpty(tmx, "tmx format"); } catch (Exception e) { logger.error(e.getMessage(), e); throw new WebServiceException(e.getMessage()); } checkAccess(accessToken, "editEntry"); checkPermission(accessToken, Permission.TM_EDIT_ENTRY); Company company = getCompanyByName(companyName); if (company == null) { throw new WebServiceException("Can not find the company with name (" + companyName + ")"); } final ProjectTM ptm = getProjectTm(tmName, company.getIdAsLong()); if (ptm == null) { throw new WebServiceException( "Can not find the tm with tm name (" + tmName + ") and company name (" + companyName + ")"); } SAXReader reader = new SAXReader(); ElementHandler handler = new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element element = path.getCurrent(); element.detach(); try { normalizeTu(element); validateTu(element); if (ptm.getTm3Id() == null) { editTm2Tu(element); } else { editTm3Tu(element, ptm); } } catch (Throwable ex) { logger.error(ex.getMessage(), ex); throw new ThreadDeath(); } } }; reader.addHandler("/tu", handler); WebServicesLog.Start activityStart = null; try { String loggedUserName = this.getUsernameFromSession(accessToken); Map<Object, Object> activityArgs = new HashMap<Object, Object>(); activityArgs.put("loggedUserName", loggedUserName); activityStart = WebServicesLog.start(Ambassador.class, "editTu(accessToken,tmx)", activityArgs); reader.read(new StringReader(tmx)); } catch (DocumentException e) { logger.error(e.getMessage(), e); throw new WebServiceException(e.getMessage()); } finally { if (activityStart != null) { activityStart.end(); } } return "true"; }
From source file:com.nokia.config.SAXConfigParser.java
License:Open Source License
/** * Constructor/*from www. jav a 2 s . c o m*/ * @return list of available configurations that can be built. */ public String getConfigs() { File file = new File(sysdefFile); SAXReader reader = new SAXReader(); reader.addHandler("/SystemDefinition/build/target", new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element row = path.getCurrent(); Iterator itr = row.attributeIterator(); while (itr.hasNext()) { Attribute child = (Attribute) itr.next(); String attrName = child.getQualifiedName(); if (attrName.equals("name")) { configs += (String) child.getValue() + ","; } } row.detach(); } }); try { Document doc = reader.read(file); } catch (Exception e) { e.printStackTrace(); } return configs; }
From source file:com.nokia.helium.sbs.SAXSysdefParser.java
License:Open Source License
/** * Constructor//from w ww . ja v a 2 s .c om * * @return list of available configurations that can be built. */ public void parseConfig(String nodeToGet) { layers = new ArrayList<String>(); SAXReader reader = new SAXReader(); reader.addHandler("/SystemDefinition/systemModel/" + nodeToGet, new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element row = path.getCurrent(); Iterator itr = row.attributeIterator(); while (itr.hasNext()) { Attribute child = (Attribute) itr.next(); String attrName = child.getQualifiedName(); if (attrName.equals("name")) { layers.add(child.getValue()); } } row.detach(); } }); try { reader.read(sysdefFile); } catch (DocumentException e) { e.printStackTrace(); } }
From source file:com.panet.imeta.trans.steps.getxmldata.GetXMLData.java
License:Open Source License
protected boolean setDocument(String StringXML, FileObject file, boolean IsInXMLField, boolean readurl) throws KettleException { try {/*from w w w .j av a 2s . co m*/ SAXReader reader = new SAXReader(); data.stopPruning = false; // Validate XML against specified schema? if (meta.isValidating()) { reader.setValidation(true); reader.setFeature("http://apache.org/xml/features/validation/schema", true); } // Ignore comments? if (meta.isIgnoreComments()) reader.setIgnoreComments(true); if (data.prunePath != null) { // when pruning is on: reader.read() below will wait until all // is processed in the handler if (log.isDetailed()) logDetailed(Messages.getString("GetXMLData.Log.StreamingMode.Activated")); reader.addHandler(data.prunePath, new ElementHandler() { public void onStart(ElementPath path) { // do nothing here... } public void onEnd(ElementPath path) { if (isStopped()) { // when a large file is processed and it should be // stopped it is still reading the hole thing // the only solution I see is to prune / detach the // document and this will lead into a // NPE or other errors depending on the parsing // location - this will be treated in the catch part // below // any better idea is welcome if (log.isBasic()) logBasic(Messages.getString("GetXMLData.Log.StreamingMode.Stopped")); data.stopPruning = true; path.getCurrent().getDocument().detach(); // trick // to // stop // reader return; } // process a ROW element if (log.isDebug()) logDebug(Messages.getString("GetXMLData.Log.StreamingMode.StartProcessing")); Element row = path.getCurrent(); try { processStreaming(row.getDocument()); } catch (Exception e) { // catch the KettleException or others and forward // to caller, e.g. when applyXPath() has a problem throw new RuntimeException(e); } // prune the tree row.detach(); if (log.isDebug()) logDebug(Messages.getString("GetXMLData.Log.StreamingMode.EndProcessing")); } }); } if (IsInXMLField) { // read string to parse data.document = reader.read(new StringReader(StringXML)); } else if (readurl) { // read url as source data.document = reader.read(new URL(StringXML)); } else { // get encoding. By default UTF-8 String encoding = "UTF-8"; if (!Const.isEmpty(meta.getEncoding())) encoding = meta.getEncoding(); data.document = reader.read(KettleVFS.getInputStream(file), encoding); } if (meta.isNamespaceAware()) prepareNSMap(data.document.getRootElement()); } catch (Exception e) { if (data.stopPruning) { // ignore error when pruning return false; } else { throw new KettleException(e); } } return true; }
From source file:galign.helpers.tmx.TmxFile.java
License:Apache License
/** * Reads and validates a TMX XML string. */// www. j a v a2s . c om protected void init(SAXReader p_reader, InputSource p_input) throws org.dom4j.DocumentException { SAXReader reader = p_reader; // enable element complete notifications to conserve memory reader.addHandler("/tmx", new ElementHandler() { final public void onStart(ElementPath path) { Element element = path.getCurrent(); m_tmxVersion = element.attributeValue("version"); } final public void onEnd(ElementPath path) { } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/header", new ElementHandler() { final public void onStart(ElementPath path) { } final public void onEnd(ElementPath path) { Element element = path.getCurrent(); m_header = new TmxHeader(element); m_header.setTmxVersion(m_tmxVersion); // prune the current element to reduce memory element.detach(); element = null; } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/body/tu", new ElementHandler() { final public void onStart(ElementPath path) { } final public void onEnd(ElementPath path) { Element element = path.getCurrent(); addTu(new Tu(element)); // prune the current element to reduce memory element.detach(); element = null; } }); Document document = reader.read(p_input); // all done. }
From source file:musite.io.xml.PredictionResultXMLReader.java
License:Open Source License
public PredictionResult read(InputStream is) throws IOException { if (is == null) { throw new IllegalArgumentException(); }// w ww. j av a 2s .c o m final PredictionResult result = data == null ? new PredictionResultImpl() : data; ProteinsXMLReader proteinsReader = ProteinsXMLReader.createReader(result, false); proteinsReader.setRoot(root); ProteinResidueAnnotationReader annReader = new ProteinResidueAnnotationReader(); annReader.putAnnotationFieldReader(musite.prediction.PredictionResult.ANNOTATION_FIELD_SCORE, SimpleFieldXMLReader.createDoubleCollectionReader()); proteinsReader.putProteinFieldReader(musite.ResidueAnnotationUtil.RESIDUE_ANNOTATION, annReader); String path = "/model-list/model"; if (root != null) path = "/" + root + path; proteinsReader.addSaxReaderHandler(path, new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { Element elem = path.getCurrent(); String name = StringEscapeUtils.unescapeXml(elem.attributeValue("name")); PTM ptm = null; String strPTM = StringEscapeUtils.unescapeXml(elem.elementText("ptm")); if (strPTM != null) ptm = PTM.valueOf(strPTM); Set<AminoAcid> aas = null; String strAAs = StringEscapeUtils.unescapeXml(elem.elementText("amino-acids")); if (strAAs != null) { String[] strs = strAAs.replaceAll("\n", "").split(";"); int n = strs.length; aas = new HashSet(n); for (int i = 0; i < n; i++) { String str = strs[i].trim(); aas.add(AminoAcid.valueOf(str)); } } SpecificityEstimatorImpl si = null; String strSI = StringEscapeUtils.unescapeXml(elem.elementText("spec-estimate-data")); if (strSI != null) { String[] strs = strSI.replaceAll("\n", "").split(";"); List<Double> train = new ArrayList(); for (String str : strs) { train.add(Double.valueOf(str.trim())); } si = new SpecificityEstimatorImpl(train); } String comment = StringEscapeUtils.unescapeXml(elem.elementText("comment")); if (comment != null) comment = comment.replaceAll("%EOL%", "\n"); PredictionModelImpl model = new PredictionModelImpl.Builder().name(name).ptm(ptm).aminoAcids(aas) .specEstimator(si).comment(comment).build(); result.addModel(model); // prune the tree elem.detach(); } }); proteinsReader.read(is); return result; }
From source file:musite.io.xml.ProteinsXMLReader.java
License:Open Source License
public ProteinsXMLReader(Proteins proteins) { this.data = proteins; nullData = proteins == null;//w w w.j av a2 s . c om proteinFieldReaders = new HashMap(); fieldFilter = null; saxReaderHandler = new HashMap(); String path = "/protein-list/protein"; if (root != null) path = "/" + root + path; addSaxReaderHandler(path, new ElementHandler() { public void onStart(ElementPath path) { } public void onEnd(ElementPath path) { ProteinImpl protein = new ProteinImpl(); Element elem = path.getCurrent(); Iterator<Element> itr = elem.elementIterator(); while (itr.hasNext()) { Element field = (Element) itr.next(); String name = field.getQualifiedName(); if (fieldFilter != null && fieldFilterInclude != fieldFilter.contains(name)) continue; Object obj; Reader fieldReader = proteinFieldReaders.get(name); if (fieldReader != null) { try { String text = nodeContentToString(field);//field.getTextTrim(); InputStream bais = StringUtil.toStream(text); obj = fieldReader.read(bais); bais.close(); } catch (IOException e) { e.printStackTrace(); continue; } } else { obj = StringEscapeUtils.unescapeXml(field.getTextTrim()); } protein.putInfo(name, obj); } //System.out.println(protein.getAccession()); if (proteinFilter == null || proteinFilter.filter(protein)) data.addProtein(protein); int count = data.proteinCount(); if (count % 1000 == 0) System.out.println(count); // prune the tree elem.detach(); } }); }
From source file:musite.io.xml.UniProtXMLReader.java
License:Open Source License
public Proteins read(InputStream is) throws IOException { if (is == null) { throw new IllegalArgumentException(); }// w ww . j a v a2 s. c o m final Proteins result = data == null ? new ProteinsImpl() : data; SAXReader saxReader = new SAXReader(); final StringBuilder acc = new StringBuilder(30); final StringBuilder name = new StringBuilder(30); final StringBuilder fullName = new StringBuilder(200); final StringBuilder org = new StringBuilder(30); final StringBuilder seq = new StringBuilder(2000); final List<List> sites = new ArrayList(4); // location, ptm, enzyme, annotation final Set<String> accs = new HashSet(); // entry saxReader.addHandler("/uniprot/entry", new ElementHandler() { public void onStart(ElementPath path) { acc.setLength(0); fullName.setLength(0); seq.setLength(0); org.setLength(0); name.setLength(0); sites.clear(); accs.clear(); } public void onEnd(ElementPath path) { // process a element if (org.length() > 0 && (organismFilter == null || organismFilter.contains(org.toString())) && acc.length() > 0 && seq.length() > 0) { String accession = acc.toString(); String sequence = seq.toString(); ProteinImpl protein = new ProteinImpl(acc.toString(), sequence, name.length() == 0 ? null : name.toString(), fullName.length() == 0 ? null : fullName.toString(), org.length() == 0 ? null : org.toString()); result.addProtein(protein); for (List l : sites) { Integer site = (Integer) l.get(0); PTM ptm = (PTM) l.get(1); String enzyme = (String) l.get(2); if (enzyme != null && enzyme.equalsIgnoreCase("autocatalysis")) { enzyme = name.toString(); } Map ann = (Map) l.get(3); try { PTMAnnotationUtil.annotate(protein, site, ptm, enzyme, ann); } catch (Exception e) { e.printStackTrace(); } } if (keepAllIds) { for (String ac : accs) { mapIdMainId.put(ac, accession); } if (!accs.isEmpty()) protein.putInfo("other-accessions", new HashSet(accs)); } //System.out.println(accession); } // prune the tree Element row = path.getCurrent(); row.detach(); } }); // accession saxReader.addHandler("/uniprot/entry/accession", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { if (acc.length() == 0) { Element el = path.getCurrent(); acc.append(el.getText()); // if (keepAllIds) { // accs.add(acc.toString()); // } } else { if (keepAllIds) { accs.add(path.getCurrent().getText()); } } } }); // name saxReader.addHandler("/uniprot/entry/name", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { if (name.length() > 0) return; Element el = path.getCurrent(); name.append(el.getText()); } }); // full name saxReader.addHandler("/uniprot/entry/protein/recommendedName/fullName", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { if (fullName.length() > 0) return; Element el = path.getCurrent(); fullName.append(el.getTextTrim()); } }); saxReader.addHandler("/uniprot/entry/organism/name", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { if (org.length() > 0) return; Element el = path.getCurrent(); String attr = el.attributeValue("type"); if (attr == null || !attr.equalsIgnoreCase("scientific")) { return; } org.append(el.getText()); } }); saxReader.addHandler("/uniprot/entry/sequence", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { if (seq.length() > 0) return; Element el = path.getCurrent(); seq.append(el.getText().replaceAll("\\p{Space}", "")); } }); saxReader.addHandler("/uniprot/entry/feature", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { Element el = path.getCurrent(); String type = el.attributeValue("type"); if (type == null) return; PTM ptm = null; String enzyme = null; String description = null; String keyword = null; if (UNIPROT_TYPES.contains(type.toLowerCase())) { description = el.attributeValue("description"); if (description == null) return; String[] descs = description.split("; "); for (String desc : descs) { PTM tmp = PTM.ofKeyword(desc); if (tmp != null) { ptm = tmp; keyword = desc; } else if (desc.startsWith("by ")) { enzyme = desc.substring(3); } } } // else if (type.equalsIgnoreCase("glycosylation site")) { // description = el.attributeValue("description"); // ptm = PTM.GLYCOSYLATION; // } // else if (type.equalsIgnoreCase()) { // description = el.attributeValue("description"); // String[] descs = description.split("; "); // for (String desc : descs) { // PTM tmp = PTM.ofKeyword(desc); // if (tmp != null) { // ptm = tmp; // keyword = desc; // } else if (desc.startsWith("by ")) { // enzyme = desc.substring(3); // } // } // } if (ptm == null || (ptmFilter != null && !ptmFilter.contains(ptm))) return; String status = el.attributeValue("status"); if (status != null) { if (!includeBySimilarity && status.equalsIgnoreCase("By similarity")) return; if (!includeProbable && status.equalsIgnoreCase("Probable")) return; if (!includePotential && status.equalsIgnoreCase("Potential")) return; } int site = -1; List<Element> locs = el.elements("location"); for (Element loc : locs) { List<Element> poss = loc.elements("position"); for (Element pos : poss) { String str = pos.attributeValue("position"); if (str == null) continue; try { site = Integer.parseInt(str) - 1; //start from 0 } catch (NumberFormatException e) { continue; } } } if (site != -1) { List l = new ArrayList(); l.add(site); l.add(ptm); l.add(enzyme); Map<String, Object> m = new HashMap(); if (keyword != null) m.put("keyword", keyword); if (description != null) m.put("description", description); if (status != null) m.put("status", status); l.add(m); sites.add(l); } } }); BufferedInputStream bis = new BufferedInputStream(is); try { saxReader.read(bis); } catch (DocumentException e) { throw new IOException(e.getMessage()); } return result; }