List of usage examples for org.dom4j ElementPath getCurrent
Element getCurrent();
From source file:de.tudarmstadt.ukp.lmf.transform.XMLToDBTransformer.java
License:Apache License
@Override public void onStart(ElementPath epath) { Element el = epath.getCurrent(); String n = el.getName();// www .java2s . c om // Remove empty attributes and invalid characters. Iterator<?> attrIter = el.attributeIterator(); while (attrIter.hasNext()) { Attribute attr = (Attribute) attrIter.next(); if ("NULL".equals(attr.getStringValue())) { attrIter.remove(); } else { attr.setValue(StringUtils.replaceNonUtf8(attr.getValue())); } } if ("LexicalResource".equals(n)) { // If no lexical resource exists yet, create a new one. if (lexicalResource == null) { lexicalResource = new LexicalResource(); lexicalResource.setName(el.attributeValue("name")); lexicalResource.setDtdVersion(el.attributeValue("dtdVersion")); session.save(lexicalResource); } else { externalLexicalResource = true; } } else if ("Lexicon".equals(n)) { // Create a new, empty lexicon. lexicon = new Lexicon(); lexicon.setId(el.attributeValue("id")); lexicon.setName(el.attributeValue("name")); lexicon.setLanguageIdentifier(el.attributeValue("languageIdentifier")); lexicalResource.addLexicon(lexicon); saveCascade(lexicon, lexicalResource); } // Save some global information if we're using a new lexical resource. else if ("GlobalInformation".equals(n) && !externalLexicalResource) { GlobalInformation glInformation = new GlobalInformation(); glInformation.setLabel(el.attributeValue("label")); lexicalResource.setGlobalInformation(glInformation); saveCascade(glInformation, lexicalResource); commit(); lexicalResource.setGlobalInformation(null); } }
From source file:de.tudarmstadt.ukp.lmf.transform.XMLToDBTransformer.java
License:Apache License
@Override public void onEnd(ElementPath epath) { Element el = epath.getCurrent(); String n = el.getName();// w ww. j a v a 2 s . c o m Object listElement = null; // Create instances for all direct children of Lexicon. if ("LexicalEntry".equals(n)) { listElement = fromXmlToObject(el, LexicalEntry.class); saveListElement(lexicon, lexicon.getLexicalEntries(), listElement); } else if ("SemanticPredicate".equals(n)) { listElement = fromXmlToObject(el, SemanticPredicate.class); saveListElement(lexicon, lexicon.getSemanticPredicates(), listElement); } else if ("SubcategorizationFrame".equals(n)) { listElement = fromXmlToObject(el, SubcategorizationFrame.class); saveListElement(lexicon, lexicon.getSubcategorizationFrames(), listElement); } else if ("SubcategorizationFrameSet".equals(n)) { listElement = fromXmlToObject(el, SubcategorizationFrameSet.class); saveListElement(lexicon, lexicon.getSubcategorizationFrameSets(), listElement); } else if ("SynSemCorrespondence".equals(n)) { listElement = fromXmlToObject(el, SynSemCorrespondence.class); saveListElement(lexicon, lexicon.getSynSemCorrespondences(), listElement); } else if ("Synset".equals(n)) { listElement = fromXmlToObject(el, Synset.class); saveListElement(lexicon, lexicon.getSynsets(), listElement); } else if ("ConstraintSet".equals(n)) { listElement = fromXmlToObject(el, ConstraintSet.class); saveListElement(lexicon, lexicon.getConstraintSets(), listElement); } else // Create instances for all direct children of LexicalResource. if ("SenseAxis".equals(n)) { listElement = fromXmlToObject(el, SenseAxis.class); saveListElement(lexicalResource, lexicalResource.getSenseAxes(), listElement); } else if ("PredicateArgumentAxis".equals(n)) { listElement = fromXmlToObject(el, PredicateArgumentAxis.class); saveListElement(lexicalResource, lexicalResource.getPredicateArgumentAxes(), listElement); } else if ("MetaData".equals(n)) { listElement = fromXmlToObject(el, MetaData.class); saveListElement(lexicalResource, lexicalResource.getMetaData(), listElement); } // Forget the corresponding XML elements of the saved instances. if (listElement != null) { el.detach(); } }
From source file:galign.helpers.tmx.TmxFile.java
License:Apache License
/** * Reads and validates a TMX XML string. *//*from w w w .j av a2 s . c om*/ protected void init(SAXReader p_reader, InputSource p_input) throws org.dom4j.DocumentException { SAXReader reader = p_reader; // enable element complete notifications to conserve memory reader.addHandler("/tmx", new ElementHandler() { final public void onStart(ElementPath path) { Element element = path.getCurrent(); m_tmxVersion = element.attributeValue("version"); } final public void onEnd(ElementPath path) { } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/header", new ElementHandler() { final public void onStart(ElementPath path) { } final public void onEnd(ElementPath path) { Element element = path.getCurrent(); m_header = new TmxHeader(element); m_header.setTmxVersion(m_tmxVersion); // prune the current element to reduce memory element.detach(); element = null; } }); // enable element complete notifications to conserve memory reader.addHandler("/tmx/body/tu", new ElementHandler() { final public void onStart(ElementPath path) { } final public void onEnd(ElementPath path) { Element element = path.getCurrent(); addTu(new Tu(element)); // prune the current element to reduce memory element.detach(); element = null; } }); Document document = reader.read(p_input); // all done. }
From source file:musite.io.xml.UniProtXMLReader.java
License:Open Source License
public Proteins read(InputStream is) throws IOException { if (is == null) { throw new IllegalArgumentException(); }//w ww . ja v a 2s . co m final Proteins result = data == null ? new ProteinsImpl() : data; SAXReader saxReader = new SAXReader(); final StringBuilder acc = new StringBuilder(30); final StringBuilder name = new StringBuilder(30); final StringBuilder fullName = new StringBuilder(200); final StringBuilder org = new StringBuilder(30); final StringBuilder seq = new StringBuilder(2000); final List<List> sites = new ArrayList(4); // location, ptm, enzyme, annotation final Set<String> accs = new HashSet(); // entry saxReader.addHandler("/uniprot/entry", new ElementHandler() { public void onStart(ElementPath path) { acc.setLength(0); fullName.setLength(0); seq.setLength(0); org.setLength(0); name.setLength(0); sites.clear(); accs.clear(); } public void onEnd(ElementPath path) { // process a element if (org.length() > 0 && (organismFilter == null || organismFilter.contains(org.toString())) && acc.length() > 0 && seq.length() > 0) { String accession = acc.toString(); String sequence = seq.toString(); ProteinImpl protein = new ProteinImpl(acc.toString(), sequence, name.length() == 0 ? null : name.toString(), fullName.length() == 0 ? null : fullName.toString(), org.length() == 0 ? null : org.toString()); result.addProtein(protein); for (List l : sites) { Integer site = (Integer) l.get(0); PTM ptm = (PTM) l.get(1); String enzyme = (String) l.get(2); if (enzyme != null && enzyme.equalsIgnoreCase("autocatalysis")) { enzyme = name.toString(); } Map ann = (Map) l.get(3); try { PTMAnnotationUtil.annotate(protein, site, ptm, enzyme, ann); } catch (Exception e) { e.printStackTrace(); } } if (keepAllIds) { for (String ac : accs) { mapIdMainId.put(ac, accession); } if (!accs.isEmpty()) protein.putInfo("other-accessions", new HashSet(accs)); } //System.out.println(accession); } // prune the tree Element row = path.getCurrent(); row.detach(); } }); // accession saxReader.addHandler("/uniprot/entry/accession", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { if (acc.length() == 0) { Element el = path.getCurrent(); acc.append(el.getText()); // if (keepAllIds) { // accs.add(acc.toString()); // } } else { if (keepAllIds) { accs.add(path.getCurrent().getText()); } } } }); // name saxReader.addHandler("/uniprot/entry/name", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { if (name.length() > 0) return; Element el = path.getCurrent(); name.append(el.getText()); } }); // full name saxReader.addHandler("/uniprot/entry/protein/recommendedName/fullName", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { if (fullName.length() > 0) return; Element el = path.getCurrent(); fullName.append(el.getTextTrim()); } }); saxReader.addHandler("/uniprot/entry/organism/name", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { if (org.length() > 0) return; Element el = path.getCurrent(); String attr = el.attributeValue("type"); if (attr == null || !attr.equalsIgnoreCase("scientific")) { return; } org.append(el.getText()); } }); saxReader.addHandler("/uniprot/entry/sequence", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { if (seq.length() > 0) return; Element el = path.getCurrent(); seq.append(el.getText().replaceAll("\\p{Space}", "")); } }); saxReader.addHandler("/uniprot/entry/feature", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { Element el = path.getCurrent(); String type = el.attributeValue("type"); if (type == null) return; PTM ptm = null; String enzyme = null; String description = null; String keyword = null; if (UNIPROT_TYPES.contains(type.toLowerCase())) { description = el.attributeValue("description"); if (description == null) return; String[] descs = description.split("; "); for (String desc : descs) { PTM tmp = PTM.ofKeyword(desc); if (tmp != null) { ptm = tmp; keyword = desc; } else if (desc.startsWith("by ")) { enzyme = desc.substring(3); } } } // else if (type.equalsIgnoreCase("glycosylation site")) { // description = el.attributeValue("description"); // ptm = PTM.GLYCOSYLATION; // } // else if (type.equalsIgnoreCase()) { // description = el.attributeValue("description"); // String[] descs = description.split("; "); // for (String desc : descs) { // PTM tmp = PTM.ofKeyword(desc); // if (tmp != null) { // ptm = tmp; // keyword = desc; // } else if (desc.startsWith("by ")) { // enzyme = desc.substring(3); // } // } // } if (ptm == null || (ptmFilter != null && !ptmFilter.contains(ptm))) return; String status = el.attributeValue("status"); if (status != null) { if (!includeBySimilarity && status.equalsIgnoreCase("By similarity")) return; if (!includeProbable && status.equalsIgnoreCase("Probable")) return; if (!includePotential && status.equalsIgnoreCase("Potential")) return; } int site = -1; List<Element> locs = el.elements("location"); for (Element loc : locs) { List<Element> poss = loc.elements("position"); for (Element pos : poss) { String str = pos.attributeValue("position"); if (str == null) continue; try { site = Integer.parseInt(str) - 1; //start from 0 } catch (NumberFormatException e) { continue; } } } if (site != -1) { List l = new ArrayList(); l.add(site); l.add(ptm); l.add(enzyme); Map<String, Object> m = new HashMap(); if (keyword != null) m.put("keyword", keyword); if (description != null) m.put("description", description); if (status != null) m.put("status", status); l.add(m); sites.add(l); } } }); BufferedInputStream bis = new BufferedInputStream(is); try { saxReader.read(bis); } catch (DocumentException e) { throw new IOException(e.getMessage()); } return result; }
From source file:musite.taxonomy.UniprotTaxonomyXMLReader.java
License:Open Source License
public TaxonomyTree read(InputStream is) throws IOException { if (is == null) { throw new IllegalArgumentException(); }// w w w . j av a 2 s.c o m final TaxonomyTree tree = new TaxonomyTree(); SAXReader saxReader = new SAXReader(); final TaxonomyNode currentNode = new TaxonomyNode(); // entry saxReader.addHandler("/RDF/Description", new ElementHandler() { public void onStart(ElementPath path) { currentNode.clearMembers(); Element element = path.getCurrent(); Attribute attribute = element.attribute("about"); String TaxonomyID = attribute.getValue().replaceAll(UniprotTaxonomySettings.ID_ADDRESS, ""); currentNode.setIdentifier(TaxonomyID); } public void onEnd(ElementPath path) { // process an element //create a new node TaxonomyNode node = tree.getTaxonomyNode(currentNode.getIdentifier()); if (node == null) { node = new TaxonomyNode(); currentNode.copyMembersTo(node); tree.addtoNodelist(node); } else { currentNode.copyMembersTo(node); } //change the parent from currentNode to node ArrayList<TaxonomyNode> parentlist = node.getParents(); for (int i = 0; i < parentlist.size(); i++) { TaxonomyNode parent = parentlist.get(i); parent.getChildren().add(node); } // prune the tree Element row = path.getCurrent(); row.detach(); } }); // type saxReader.addHandler("/RDF/Description/type", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { Element typeElement = (Element) path.getCurrent(); Attribute typeAttribute = typeElement.attribute("resource"); String Type = typeAttribute.getValue().replaceAll(UniprotTaxonomySettings.TYPE_ADDRESS, ""); currentNode.setType(Type); } }); // rank saxReader.addHandler("/RDF/Description/rank", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { Element rankElement = (Element) path.getCurrent(); Attribute rankAttribute = rankElement.attribute("resource"); String Rank = rankAttribute.getValue().replaceAll(UniprotTaxonomySettings.RANK_ADDRESS, ""); currentNode.setRank(Rank); } }); // scientificName saxReader.addHandler("/RDF/Description/scientificName", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { Element scientificnameElement = (Element) path.getCurrent(); String ScientificName = scientificnameElement.getText(); currentNode.setScientificName(ScientificName); } }); // otherName saxReader.addHandler("/RDF/Description/otherName", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { Element othernameElement = (Element) path.getCurrent(); String tempname = othernameElement.getText(); currentNode.addOthernames(tempname); } }); // partOfLineage saxReader.addHandler("/RDF/Description/partOfLineage", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { Element lineageElement = (Element) path.getCurrent(); String temptext = lineageElement.getText(); boolean partOfLineage; if (temptext.equals("true")) { partOfLineage = true; } else partOfLineage = false; currentNode.setPartOfLineage(partOfLineage); } }); // Add parent saxReader.addHandler("/RDF/Description/subClassOf", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { Element subclassElement = (Element) path.getCurrent(); Attribute subclassAttribute = subclassElement.attribute("resource"); String subclassID = subclassAttribute.getValue().replaceAll(UniprotTaxonomySettings.ID_ADDRESS, ""); TaxonomyNode parent = tree.getTaxonomyNode(subclassID); if (parent != null) { currentNode.addParentOnly(parent); } else { parent = new TaxonomyNode(); parent.setIdentifier(subclassID); tree.addtoNodelist(parent); currentNode.addParentOnly(parent); } } }); BufferedInputStream bis = new BufferedInputStream(is); Document doc; try { doc = saxReader.read(bis); } catch (DocumentException e) { throw new IOException(e.getMessage()); } tree.searchRoot(); return tree; }
From source file:org.dom4j.samples.LargeDocumentDemo.java
License:Open Source License
public void onStart(ElementPath path) { Element element = path.getCurrent(); println("onStart: of parsing element: " + element); }
From source file:org.dom4j.samples.LargeDocumentDemo.java
License:Open Source License
public void onEnd(ElementPath path) { Element element = path.getCurrent(); println("onEnd: of parsing element: " + element + " with: " + element.content().size() + " content node(s)"); // now prune the current element to reduce memory element.detach();//from ww w . j a v a 2s.c om }
From source file:org.localmatters.serializer.config.SerializationElementHandler.java
License:Apache License
/** * @see org.dom4j.ElementHandler#onEnd(org.dom4j.ElementPath) *//*from w ww. j a v a 2 s . c om*/ @SuppressWarnings("unchecked") public void onEnd(ElementPath elementPath) { Element root = elementPath.getCurrent(); List<Element> elements = root.elements(); for (Element element : elements) { handleId(element, new HashMap<String, String>(), true); } Set<String> invalids = resolveReferences(getReferences(), getSerializations()); invalids.addAll(resolveExtensions(getExtensions(), getComplexWithIds())); if (CollectionUtils.isNotEmpty(invalids)) { throw new ConfigurationException(INVALID_ID_FORMAT, invalids); } }
From source file:org.orbeon.oxf.xml.dom4j.LocationSAXContentHandler.java
License:Open Source License
public void startElement(String namespaceURI, String localName, String qualifiedName, Attributes attributes) throws SAXException { super.startElement(namespaceURI, localName, qualifiedName, attributes); final LocationData locationData = LocationData.createIfPresent(locator); if (locationData != null) { final ElementPath path = getElementStack(); path.getCurrent().setData(locationData); }/*from w w w .ja v a 2s. co m*/ }
From source file:org.pentaho.di.trans.steps.getxmldata.GetXMLData.java
License:Apache License
protected boolean setDocument(String StringXML, FileObject file, boolean IsInXMLField, boolean readurl) throws KettleException { this.prevRow = buildEmptyRow(); // pre-allocate previous row try {/*from w w w . java2 s . c om*/ SAXReader reader = XMLParserFactoryProducer.getSAXReader(null); data.stopPruning = false; // Validate XML against specified schema? if (meta.isValidating()) { reader.setValidation(true); reader.setFeature("http://apache.org/xml/features/validation/schema", true); } else { // Ignore DTD declarations reader.setEntityResolver(new IgnoreDTDEntityResolver()); } // Ignore comments? if (meta.isIgnoreComments()) { reader.setIgnoreComments(true); } if (data.prunePath != null) { // when pruning is on: reader.read() below will wait until all is processed in the handler if (log.isDetailed()) { logDetailed(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.Activated")); } if (data.PathValue.equals(data.prunePath)) { // Edge case, but if true, there will only ever be one item in the list data.an = new ArrayList<>(1); // pre-allocate array and sizes data.an.add(null); } reader.addHandler(data.prunePath, new ElementHandler() { public void onStart(ElementPath path) { // do nothing here... } public void onEnd(ElementPath path) { if (isStopped()) { // when a large file is processed and it should be stopped it is still reading the hole thing // the only solution I see is to prune / detach the document and this will lead into a // NPE or other errors depending on the parsing location - this will be treated in the catch part below // any better idea is welcome if (log.isBasic()) { logBasic(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.Stopped")); } data.stopPruning = true; path.getCurrent().getDocument().detach(); // trick to stop reader return; } // process a ROW element if (log.isDebug()) { logDebug(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.StartProcessing")); } Element row = path.getCurrent(); try { // Pass over the row instead of just the document. If // if there's only one row, there's no need to // go back to the whole document. processStreaming(row); } catch (Exception e) { // catch the KettleException or others and forward to caller, e.g. when applyXPath() has a problem throw new RuntimeException(e); } // prune the tree row.detach(); if (log.isDebug()) { logDebug(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.EndProcessing")); } } }); } if (IsInXMLField) { // read string to parse data.document = reader.read(new StringReader(StringXML)); } else if (readurl && KettleVFS.startsWithScheme(StringXML)) { data.document = reader.read(KettleVFS.getInputStream(StringXML)); } else if (readurl) { // read url as source HttpClient client = HttpClientManager.getInstance().createDefaultClient(); HttpGet method = new HttpGet(StringXML); method.addHeader("Accept-Encoding", "gzip"); HttpResponse response = client.execute(method); Header contentEncoding = response.getFirstHeader("Content-Encoding"); HttpEntity responseEntity = response.getEntity(); if (responseEntity != null) { if (contentEncoding != null) { String acceptEncodingValue = contentEncoding.getValue(); if (acceptEncodingValue.contains("gzip")) { GZIPInputStream in = new GZIPInputStream(responseEntity.getContent()); data.document = reader.read(in); } } else { data.document = reader.read(responseEntity.getContent()); } } } else { // get encoding. By default UTF-8 String encoding = "UTF-8"; if (!Utils.isEmpty(meta.getEncoding())) { encoding = meta.getEncoding(); } InputStream is = KettleVFS.getInputStream(file); try { data.document = reader.read(is, encoding); } finally { BaseStep.closeQuietly(is); } } if (meta.isNamespaceAware()) { prepareNSMap(data.document.getRootElement()); } } catch (Exception e) { if (data.stopPruning) { // ignore error when pruning return false; } else { throw new KettleException(e); } } return true; }