Example usage for org.dom4j ElementPath getCurrent

Introduction

In this page you can find the example usage for org.dom4j ElementPath getCurrent.

Prototype

Element getCurrent();

Source Link

Document

DOCUMENT ME!

Usage

From source file:de.tudarmstadt.ukp.lmf.transform.XMLToDBTransformer.java

License:Apache License

@Override
public void onStart(ElementPath epath) {
    Element el = epath.getCurrent();
    String n = el.getName();//  www  .java2s .  c  om

    // Remove empty attributes and invalid characters.
    Iterator<?> attrIter = el.attributeIterator();
    while (attrIter.hasNext()) {
        Attribute attr = (Attribute) attrIter.next();
        if ("NULL".equals(attr.getStringValue())) {
            attrIter.remove();
        } else {
            attr.setValue(StringUtils.replaceNonUtf8(attr.getValue()));
        }
    }

    if ("LexicalResource".equals(n)) {
        // If no lexical resource exists yet, create a new one.
        if (lexicalResource == null) {
            lexicalResource = new LexicalResource();
            lexicalResource.setName(el.attributeValue("name"));
            lexicalResource.setDtdVersion(el.attributeValue("dtdVersion"));
            session.save(lexicalResource);
        } else {
            externalLexicalResource = true;
        }
    } else if ("Lexicon".equals(n)) {
        // Create a new, empty lexicon.
        lexicon = new Lexicon();
        lexicon.setId(el.attributeValue("id"));
        lexicon.setName(el.attributeValue("name"));
        lexicon.setLanguageIdentifier(el.attributeValue("languageIdentifier"));
        lexicalResource.addLexicon(lexicon);
        saveCascade(lexicon, lexicalResource);
    }
    // Save some global information if we're using a new lexical resource.
    else if ("GlobalInformation".equals(n) && !externalLexicalResource) {
        GlobalInformation glInformation = new GlobalInformation();
        glInformation.setLabel(el.attributeValue("label"));
        lexicalResource.setGlobalInformation(glInformation);
        saveCascade(glInformation, lexicalResource);
        commit();
        lexicalResource.setGlobalInformation(null);
    }
}

From source file:de.tudarmstadt.ukp.lmf.transform.XMLToDBTransformer.java

License:Apache License

@Override
public void onEnd(ElementPath epath) {
    Element el = epath.getCurrent();
    String n = el.getName();//  w ww. j a  v a  2 s .  c o m
    Object listElement = null;

    // Create instances for all direct children of Lexicon.
    if ("LexicalEntry".equals(n)) {
        listElement = fromXmlToObject(el, LexicalEntry.class);
        saveListElement(lexicon, lexicon.getLexicalEntries(), listElement);
    } else if ("SemanticPredicate".equals(n)) {
        listElement = fromXmlToObject(el, SemanticPredicate.class);
        saveListElement(lexicon, lexicon.getSemanticPredicates(), listElement);
    } else if ("SubcategorizationFrame".equals(n)) {
        listElement = fromXmlToObject(el, SubcategorizationFrame.class);
        saveListElement(lexicon, lexicon.getSubcategorizationFrames(), listElement);
    } else if ("SubcategorizationFrameSet".equals(n)) {
        listElement = fromXmlToObject(el, SubcategorizationFrameSet.class);
        saveListElement(lexicon, lexicon.getSubcategorizationFrameSets(), listElement);
    } else if ("SynSemCorrespondence".equals(n)) {
        listElement = fromXmlToObject(el, SynSemCorrespondence.class);
        saveListElement(lexicon, lexicon.getSynSemCorrespondences(), listElement);
    } else if ("Synset".equals(n)) {
        listElement = fromXmlToObject(el, Synset.class);
        saveListElement(lexicon, lexicon.getSynsets(), listElement);
    } else if ("ConstraintSet".equals(n)) {
        listElement = fromXmlToObject(el, ConstraintSet.class);
        saveListElement(lexicon, lexicon.getConstraintSets(), listElement);
    } else

    // Create instances for all direct children of LexicalResource.
    if ("SenseAxis".equals(n)) {
        listElement = fromXmlToObject(el, SenseAxis.class);
        saveListElement(lexicalResource, lexicalResource.getSenseAxes(), listElement);
    } else if ("PredicateArgumentAxis".equals(n)) {
        listElement = fromXmlToObject(el, PredicateArgumentAxis.class);
        saveListElement(lexicalResource, lexicalResource.getPredicateArgumentAxes(), listElement);
    } else if ("MetaData".equals(n)) {
        listElement = fromXmlToObject(el, MetaData.class);
        saveListElement(lexicalResource, lexicalResource.getMetaData(), listElement);
    }

    // Forget the corresponding XML elements of the saved instances.
    if (listElement != null) {
        el.detach();
    }
}

From source file:galign.helpers.tmx.TmxFile.java

License:Apache License

/**
 * Reads and validates a TMX XML string.
 *//*from   w  w w .j av a2  s  .  c om*/
protected void init(SAXReader p_reader, InputSource p_input) throws org.dom4j.DocumentException {
    SAXReader reader = p_reader;

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx", new ElementHandler() {
        final public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_tmxVersion = element.attributeValue("version");
        }

        final public void onEnd(ElementPath path) {
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/header", new ElementHandler() {
        final public void onStart(ElementPath path) {
        }

        final public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            m_header = new TmxHeader(element);
            m_header.setTmxVersion(m_tmxVersion);

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu", new ElementHandler() {
        final public void onStart(ElementPath path) {
        }

        final public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            addTu(new Tu(element));

            // prune the current element to reduce memory
            element.detach();
            element = null;
        }
    });

    Document document = reader.read(p_input);
    // all done.
}

From source file:musite.io.xml.UniProtXMLReader.java

License:Open Source License

public Proteins read(InputStream is) throws IOException {
    if (is == null) {
        throw new IllegalArgumentException();
    }//w  ww  .  ja  v a 2s  . co  m

    final Proteins result = data == null ? new ProteinsImpl() : data;

    SAXReader saxReader = new SAXReader();

    final StringBuilder acc = new StringBuilder(30);
    final StringBuilder name = new StringBuilder(30);
    final StringBuilder fullName = new StringBuilder(200);
    final StringBuilder org = new StringBuilder(30);
    final StringBuilder seq = new StringBuilder(2000);
    final List<List> sites = new ArrayList(4); // location, ptm, enzyme, annotation
    final Set<String> accs = new HashSet();

    // entry
    saxReader.addHandler("/uniprot/entry", new ElementHandler() {
        public void onStart(ElementPath path) {
            acc.setLength(0);
            fullName.setLength(0);
            seq.setLength(0);
            org.setLength(0);
            name.setLength(0);
            sites.clear();
            accs.clear();
        }

        public void onEnd(ElementPath path) {
            // process a element
            if (org.length() > 0 && (organismFilter == null || organismFilter.contains(org.toString()))
                    && acc.length() > 0 && seq.length() > 0) {
                String accession = acc.toString();
                String sequence = seq.toString();

                ProteinImpl protein = new ProteinImpl(acc.toString(), sequence,
                        name.length() == 0 ? null : name.toString(),
                        fullName.length() == 0 ? null : fullName.toString(),
                        org.length() == 0 ? null : org.toString());
                result.addProtein(protein);

                for (List l : sites) {
                    Integer site = (Integer) l.get(0);
                    PTM ptm = (PTM) l.get(1);
                    String enzyme = (String) l.get(2);
                    if (enzyme != null && enzyme.equalsIgnoreCase("autocatalysis")) {
                        enzyme = name.toString();
                    }

                    Map ann = (Map) l.get(3);
                    try {
                        PTMAnnotationUtil.annotate(protein, site, ptm, enzyme, ann);
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }

                if (keepAllIds) {
                    for (String ac : accs) {
                        mapIdMainId.put(ac, accession);
                    }
                    if (!accs.isEmpty())
                        protein.putInfo("other-accessions", new HashSet(accs));
                }

                //System.out.println(accession);
            }

            // prune the tree
            Element row = path.getCurrent();
            row.detach();
        }
    });

    // accession
    saxReader.addHandler("/uniprot/entry/accession", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            if (acc.length() == 0) {
                Element el = path.getCurrent();
                acc.append(el.getText());
                //                    if (keepAllIds) {
                //                        accs.add(acc.toString());
                //                    }
            } else {
                if (keepAllIds) {
                    accs.add(path.getCurrent().getText());
                }
            }

        }
    });

    // name
    saxReader.addHandler("/uniprot/entry/name", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            if (name.length() > 0)
                return;

            Element el = path.getCurrent();
            name.append(el.getText());
        }
    });

    // full name
    saxReader.addHandler("/uniprot/entry/protein/recommendedName/fullName", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            if (fullName.length() > 0)
                return;

            Element el = path.getCurrent();
            fullName.append(el.getTextTrim());
        }
    });

    saxReader.addHandler("/uniprot/entry/organism/name", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            if (org.length() > 0)
                return;

            Element el = path.getCurrent();
            String attr = el.attributeValue("type");
            if (attr == null || !attr.equalsIgnoreCase("scientific")) {
                return;
            }

            org.append(el.getText());
        }
    });

    saxReader.addHandler("/uniprot/entry/sequence", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            if (seq.length() > 0)
                return;

            Element el = path.getCurrent();
            seq.append(el.getText().replaceAll("\\p{Space}", ""));
        }
    });

    saxReader.addHandler("/uniprot/entry/feature", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            Element el = path.getCurrent();
            String type = el.attributeValue("type");
            if (type == null)
                return;

            PTM ptm = null;
            String enzyme = null;
            String description = null;
            String keyword = null;

            if (UNIPROT_TYPES.contains(type.toLowerCase())) {
                description = el.attributeValue("description");
                if (description == null)
                    return;

                String[] descs = description.split("; ");
                for (String desc : descs) {
                    PTM tmp = PTM.ofKeyword(desc);
                    if (tmp != null) {
                        ptm = tmp;
                        keyword = desc;
                    } else if (desc.startsWith("by ")) {
                        enzyme = desc.substring(3);
                    }
                }
            }
            //                else if (type.equalsIgnoreCase("glycosylation site")) {
            //                    description = el.attributeValue("description");
            //                    ptm = PTM.GLYCOSYLATION;
            //                }
            //                else if (type.equalsIgnoreCase()) {
            //                    description = el.attributeValue("description");
            //                    String[] descs = description.split("; ");
            //                    for (String desc : descs) {
            //                        PTM tmp = PTM.ofKeyword(desc);
            //                        if (tmp != null) {
            //                            ptm = tmp;
            //                            keyword = desc;
            //                        } else if (desc.startsWith("by ")) {
            //                            enzyme = desc.substring(3);
            //                        }
            //                    }
            //                }

            if (ptm == null || (ptmFilter != null && !ptmFilter.contains(ptm)))
                return;

            String status = el.attributeValue("status");
            if (status != null) {
                if (!includeBySimilarity && status.equalsIgnoreCase("By similarity"))
                    return;
                if (!includeProbable && status.equalsIgnoreCase("Probable"))
                    return;
                if (!includePotential && status.equalsIgnoreCase("Potential"))
                    return;
            }

            int site = -1;

            List<Element> locs = el.elements("location");
            for (Element loc : locs) {
                List<Element> poss = loc.elements("position");
                for (Element pos : poss) {
                    String str = pos.attributeValue("position");
                    if (str == null)
                        continue;

                    try {
                        site = Integer.parseInt(str) - 1; //start from 0
                    } catch (NumberFormatException e) {
                        continue;
                    }
                }
            }

            if (site != -1) {
                List l = new ArrayList();
                l.add(site);
                l.add(ptm);
                l.add(enzyme);
                Map<String, Object> m = new HashMap();
                if (keyword != null)
                    m.put("keyword", keyword);
                if (description != null)
                    m.put("description", description);
                if (status != null)
                    m.put("status", status);
                l.add(m);
                sites.add(l);
            }
        }
    });

    BufferedInputStream bis = new BufferedInputStream(is);

    try {
        saxReader.read(bis);
    } catch (DocumentException e) {
        throw new IOException(e.getMessage());
    }

    return result;
}

From source file:musite.taxonomy.UniprotTaxonomyXMLReader.java

License:Open Source License

public TaxonomyTree read(InputStream is) throws IOException {
    if (is == null) {
        throw new IllegalArgumentException();
    }// w w w  . j  av a 2  s.c  o m
    final TaxonomyTree tree = new TaxonomyTree();
    SAXReader saxReader = new SAXReader();
    final TaxonomyNode currentNode = new TaxonomyNode();

    // entry
    saxReader.addHandler("/RDF/Description", new ElementHandler() {
        public void onStart(ElementPath path) {
            currentNode.clearMembers();
            Element element = path.getCurrent();
            Attribute attribute = element.attribute("about");
            String TaxonomyID = attribute.getValue().replaceAll(UniprotTaxonomySettings.ID_ADDRESS, "");
            currentNode.setIdentifier(TaxonomyID);

        }

        public void onEnd(ElementPath path) {
            // process an element

            //create a new node
            TaxonomyNode node = tree.getTaxonomyNode(currentNode.getIdentifier());
            if (node == null) {
                node = new TaxonomyNode();
                currentNode.copyMembersTo(node);
                tree.addtoNodelist(node);
            } else {
                currentNode.copyMembersTo(node);
            }

            //change the parent from currentNode to node
            ArrayList<TaxonomyNode> parentlist = node.getParents();
            for (int i = 0; i < parentlist.size(); i++) {
                TaxonomyNode parent = parentlist.get(i);
                parent.getChildren().add(node);
            }

            // prune the tree
            Element row = path.getCurrent();
            row.detach();
        }
    });

    // type
    saxReader.addHandler("/RDF/Description/type", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            Element typeElement = (Element) path.getCurrent();
            Attribute typeAttribute = typeElement.attribute("resource");
            String Type = typeAttribute.getValue().replaceAll(UniprotTaxonomySettings.TYPE_ADDRESS, "");
            currentNode.setType(Type);

        }
    });

    // rank
    saxReader.addHandler("/RDF/Description/rank", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            Element rankElement = (Element) path.getCurrent();
            Attribute rankAttribute = rankElement.attribute("resource");
            String Rank = rankAttribute.getValue().replaceAll(UniprotTaxonomySettings.RANK_ADDRESS, "");
            currentNode.setRank(Rank);

        }
    });

    // scientificName
    saxReader.addHandler("/RDF/Description/scientificName", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            Element scientificnameElement = (Element) path.getCurrent();
            String ScientificName = scientificnameElement.getText();
            currentNode.setScientificName(ScientificName);
        }
    });
    // otherName
    saxReader.addHandler("/RDF/Description/otherName", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            Element othernameElement = (Element) path.getCurrent();
            String tempname = othernameElement.getText();
            currentNode.addOthernames(tempname);

        }
    });

    // partOfLineage
    saxReader.addHandler("/RDF/Description/partOfLineage", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            Element lineageElement = (Element) path.getCurrent();
            String temptext = lineageElement.getText();
            boolean partOfLineage;
            if (temptext.equals("true")) {
                partOfLineage = true;
            } else
                partOfLineage = false;
            currentNode.setPartOfLineage(partOfLineage);

        }
    });

    // Add parent
    saxReader.addHandler("/RDF/Description/subClassOf", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            Element subclassElement = (Element) path.getCurrent();
            Attribute subclassAttribute = subclassElement.attribute("resource");
            String subclassID = subclassAttribute.getValue().replaceAll(UniprotTaxonomySettings.ID_ADDRESS, "");
            TaxonomyNode parent = tree.getTaxonomyNode(subclassID);
            if (parent != null) {
                currentNode.addParentOnly(parent);
            } else {
                parent = new TaxonomyNode();
                parent.setIdentifier(subclassID);
                tree.addtoNodelist(parent);
                currentNode.addParentOnly(parent);
            }

        }
    });

    BufferedInputStream bis = new BufferedInputStream(is);

    Document doc;
    try {
        doc = saxReader.read(bis);
    } catch (DocumentException e) {
        throw new IOException(e.getMessage());
    }

    tree.searchRoot();
    return tree;
}

From source file:org.dom4j.samples.LargeDocumentDemo.java

License:Open Source License

public void onStart(ElementPath path) {
    Element element = path.getCurrent();
    println("onStart: of parsing element: " + element);
}

From source file:org.dom4j.samples.LargeDocumentDemo.java

License:Open Source License

public void onEnd(ElementPath path) {
    Element element = path.getCurrent();

    println("onEnd: of parsing element: " + element + " with: " + element.content().size()
            + " content node(s)");

    // now prune the current element to reduce memory
    element.detach();//from  ww w  . j a  v  a  2s.c om
}

From source file:org.localmatters.serializer.config.SerializationElementHandler.java

License:Apache License

/**
 * @see org.dom4j.ElementHandler#onEnd(org.dom4j.ElementPath)
 *//*from  w ww. j  a  v  a 2  s  .  c  om*/
@SuppressWarnings("unchecked")
public void onEnd(ElementPath elementPath) {
    Element root = elementPath.getCurrent();
    List<Element> elements = root.elements();
    for (Element element : elements) {
        handleId(element, new HashMap<String, String>(), true);
    }

    Set<String> invalids = resolveReferences(getReferences(), getSerializations());
    invalids.addAll(resolveExtensions(getExtensions(), getComplexWithIds()));

    if (CollectionUtils.isNotEmpty(invalids)) {
        throw new ConfigurationException(INVALID_ID_FORMAT, invalids);
    }
}

From source file:org.orbeon.oxf.xml.dom4j.LocationSAXContentHandler.java

License:Open Source License

public void startElement(String namespaceURI, String localName, String qualifiedName, Attributes attributes)
        throws SAXException {
    super.startElement(namespaceURI, localName, qualifiedName, attributes);
    final LocationData locationData = LocationData.createIfPresent(locator);
    if (locationData != null) {
        final ElementPath path = getElementStack();
        path.getCurrent().setData(locationData);
    }/*from  w w  w  .ja  v  a 2s.  co m*/
}

From source file:org.pentaho.di.trans.steps.getxmldata.GetXMLData.java

License:Apache License

protected boolean setDocument(String StringXML, FileObject file, boolean IsInXMLField, boolean readurl)
        throws KettleException {

    this.prevRow = buildEmptyRow(); // pre-allocate previous row

    try {/*from  w  w  w .  java2  s .  c om*/
        SAXReader reader = XMLParserFactoryProducer.getSAXReader(null);
        data.stopPruning = false;
        // Validate XML against specified schema?
        if (meta.isValidating()) {
            reader.setValidation(true);
            reader.setFeature("http://apache.org/xml/features/validation/schema", true);
        } else {
            // Ignore DTD declarations
            reader.setEntityResolver(new IgnoreDTDEntityResolver());
        }

        // Ignore comments?
        if (meta.isIgnoreComments()) {
            reader.setIgnoreComments(true);
        }

        if (data.prunePath != null) {
            // when pruning is on: reader.read() below will wait until all is processed in the handler
            if (log.isDetailed()) {
                logDetailed(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.Activated"));
            }
            if (data.PathValue.equals(data.prunePath)) {
                // Edge case, but if true, there will only ever be one item in the list
                data.an = new ArrayList<>(1); // pre-allocate array and sizes
                data.an.add(null);
            }
            reader.addHandler(data.prunePath, new ElementHandler() {
                public void onStart(ElementPath path) {
                    // do nothing here...
                }

                public void onEnd(ElementPath path) {
                    if (isStopped()) {
                        // when a large file is processed and it should be stopped it is still reading the hole thing
                        // the only solution I see is to prune / detach the document and this will lead into a
                        // NPE or other errors depending on the parsing location - this will be treated in the catch part below
                        // any better idea is welcome
                        if (log.isBasic()) {
                            logBasic(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.Stopped"));
                        }
                        data.stopPruning = true;
                        path.getCurrent().getDocument().detach(); // trick to stop reader
                        return;
                    }

                    // process a ROW element
                    if (log.isDebug()) {
                        logDebug(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.StartProcessing"));
                    }
                    Element row = path.getCurrent();
                    try {
                        // Pass over the row instead of just the document. If
                        // if there's only one row, there's no need to
                        // go back to the whole document.
                        processStreaming(row);
                    } catch (Exception e) {
                        // catch the KettleException or others and forward to caller, e.g. when applyXPath() has a problem
                        throw new RuntimeException(e);
                    }
                    // prune the tree
                    row.detach();
                    if (log.isDebug()) {
                        logDebug(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.EndProcessing"));
                    }
                }
            });
        }

        if (IsInXMLField) {
            // read string to parse
            data.document = reader.read(new StringReader(StringXML));
        } else if (readurl && KettleVFS.startsWithScheme(StringXML)) {
            data.document = reader.read(KettleVFS.getInputStream(StringXML));
        } else if (readurl) {
            // read url as source
            HttpClient client = HttpClientManager.getInstance().createDefaultClient();
            HttpGet method = new HttpGet(StringXML);
            method.addHeader("Accept-Encoding", "gzip");
            HttpResponse response = client.execute(method);
            Header contentEncoding = response.getFirstHeader("Content-Encoding");
            HttpEntity responseEntity = response.getEntity();
            if (responseEntity != null) {
                if (contentEncoding != null) {
                    String acceptEncodingValue = contentEncoding.getValue();
                    if (acceptEncodingValue.contains("gzip")) {
                        GZIPInputStream in = new GZIPInputStream(responseEntity.getContent());

                        data.document = reader.read(in);
                    }
                } else {
                    data.document = reader.read(responseEntity.getContent());
                }
            }
        } else {
            // get encoding. By default UTF-8
            String encoding = "UTF-8";
            if (!Utils.isEmpty(meta.getEncoding())) {
                encoding = meta.getEncoding();
            }
            InputStream is = KettleVFS.getInputStream(file);
            try {
                data.document = reader.read(is, encoding);
            } finally {
                BaseStep.closeQuietly(is);
            }
        }

        if (meta.isNamespaceAware()) {
            prepareNSMap(data.document.getRootElement());
        }
    } catch (Exception e) {
        if (data.stopPruning) {
            // ignore error when pruning
            return false;
        } else {
            throw new KettleException(e);
        }
    }
    return true;
}