Example usage for org.dom4j ElementPath getCurrent

List of usage examples for org.dom4j ElementPath getCurrent

Introduction

In this page you can find the example usage for org.dom4j ElementPath getCurrent.

Prototype

Element getCurrent();

Source Link

Document

DOCUMENT ME!

Usage

From source file:de.tudarmstadt.ukp.lmf.transform.XMLToDBTransformer.java

License:Apache License

@Override
public void onStart(ElementPath epath) {
    Element el = epath.getCurrent();
    String n = el.getName();//  www  .java2s .  c  om

    // Remove empty attributes and invalid characters.
    Iterator<?> attrIter = el.attributeIterator();
    while (attrIter.hasNext()) {
        Attribute attr = (Attribute) attrIter.next();
        if ("NULL".equals(attr.getStringValue())) {
            attrIter.remove();
        } else {
            attr.setValue(StringUtils.replaceNonUtf8(attr.getValue()));
        }
    }

    if ("LexicalResource".equals(n)) {
        // If no lexical resource exists yet, create a new one.
        if (lexicalResource == null) {
            lexicalResource = new LexicalResource();
            lexicalResource.setName(el.attributeValue("name"));
            lexicalResource.setDtdVersion(el.attributeValue("dtdVersion"));
            session.save(lexicalResource);
        } else {
            externalLexicalResource = true;
        }
    } else if ("Lexicon".equals(n)) {
        // Create a new, empty lexicon.
        lexicon = new Lexicon();
        lexicon.setId(el.attributeValue("id"));
        lexicon.setName(el.attributeValue("name"));
        lexicon.setLanguageIdentifier(el.attributeValue("languageIdentifier"));
        lexicalResource.addLexicon(lexicon);
        saveCascade(lexicon, lexicalResource);
    }
    // Save some global information if we're using a new lexical resource.
    else if ("GlobalInformation".equals(n) && !externalLexicalResource) {
        GlobalInformation glInformation = new GlobalInformation();
        glInformation.setLabel(el.attributeValue("label"));
        lexicalResource.setGlobalInformation(glInformation);
        saveCascade(glInformation, lexicalResource);
        commit();
        lexicalResource.setGlobalInformation(null);
    }
}

From source file:de.tudarmstadt.ukp.lmf.transform.XMLToDBTransformer.java

License:Apache License

@Override
public void onEnd(ElementPath epath) {
    Element el = epath.getCurrent();
    String n = el.getName();//  w ww. j a  v a  2 s .  c o m
    Object listElement = null;

    // Create instances for all direct children of Lexicon.
    if ("LexicalEntry".equals(n)) {
        listElement = fromXmlToObject(el, LexicalEntry.class);
        saveListElement(lexicon, lexicon.getLexicalEntries(), listElement);
    } else if ("SemanticPredicate".equals(n)) {
        listElement = fromXmlToObject(el, SemanticPredicate.class);
        saveListElement(lexicon, lexicon.getSemanticPredicates(), listElement);
    } else if ("SubcategorizationFrame".equals(n)) {
        listElement = fromXmlToObject(el, SubcategorizationFrame.class);
        saveListElement(lexicon, lexicon.getSubcategorizationFrames(), listElement);
    } else if ("SubcategorizationFrameSet".equals(n)) {
        listElement = fromXmlToObject(el, SubcategorizationFrameSet.class);
        saveListElement(lexicon, lexicon.getSubcategorizationFrameSets(), listElement);
    } else if ("SynSemCorrespondence".equals(n)) {
        listElement = fromXmlToObject(el, SynSemCorrespondence.class);
        saveListElement(lexicon, lexicon.getSynSemCorrespondences(), listElement);
    } else if ("Synset".equals(n)) {
        listElement = fromXmlToObject(el, Synset.class);
        saveListElement(lexicon, lexicon.getSynsets(), listElement);
    } else if ("ConstraintSet".equals(n)) {
        listElement = fromXmlToObject(el, ConstraintSet.class);
        saveListElement(lexicon, lexicon.getConstraintSets(), listElement);
    } else

    // Create instances for all direct children of LexicalResource.
    if ("SenseAxis".equals(n)) {
        listElement = fromXmlToObject(el, SenseAxis.class);
        saveListElement(lexicalResource, lexicalResource.getSenseAxes(), listElement);
    } else if ("PredicateArgumentAxis".equals(n)) {
        listElement = fromXmlToObject(el, PredicateArgumentAxis.class);
        saveListElement(lexicalResource, lexicalResource.getPredicateArgumentAxes(), listElement);
    } else if ("MetaData".equals(n)) {
        listElement = fromXmlToObject(el, MetaData.class);
        saveListElement(lexicalResource, lexicalResource.getMetaData(), listElement);
    }

    // Forget the corresponding XML elements of the saved instances.
    if (listElement != null) {
        el.detach();
    }
}

From source file:galign.helpers.tmx.TmxFile.java

License:Apache License

/**
 * Reads and validates a TMX XML string.
 *//*from   w  w w .j av a2  s  .  c om*/
protected void init(SAXReader p_reader, InputSource p_input) throws org.dom4j.DocumentException {
    SAXReader reader = p_reader;

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx", new ElementHandler() {
        final public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_tmxVersion = element.attributeValue("version");
        }

        final public void onEnd(ElementPath path) {
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/header", new ElementHandler() {
        final public void onStart(ElementPath path) {
        }

        final public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            m_header = new TmxHeader(element);
            m_header.setTmxVersion(m_tmxVersion);

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu", new ElementHandler() {
        final public void onStart(ElementPath path) {
        }

        final public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            addTu(new Tu(element));

            // prune the current element to reduce memory
            element.detach();
            element = null;
        }
    });

    Document document = reader.read(p_input);
    // all done.
}

From source file:musite.io.xml.UniProtXMLReader.java

License:Open Source License

public Proteins read(InputStream is) throws IOException {
    if (is == null) {
        throw new IllegalArgumentException();
    }//w  ww  .  ja  v a 2s  . co  m

    final Proteins result = data == null ? new ProteinsImpl() : data;

    SAXReader saxReader = new SAXReader();

    final StringBuilder acc = new StringBuilder(30);
    final StringBuilder name = new StringBuilder(30);
    final StringBuilder fullName = new StringBuilder(200);
    final StringBuilder org = new StringBuilder(30);
    final StringBuilder seq = new StringBuilder(2000);
    final List<List> sites = new ArrayList(4); // location, ptm, enzyme, annotation
    final Set<String> accs = new HashSet();

    // entry
    saxReader.addHandler("/uniprot/entry", new ElementHandler() {
        public void onStart(ElementPath path) {
            acc.setLength(0);
            fullName.setLength(0);
            seq.setLength(0);
            org.setLength(0);
            name.setLength(0);
            sites.clear();
            accs.clear();
        }

        public void onEnd(ElementPath path) {
            // process a element
            if (org.length() > 0 && (organismFilter == null || organismFilter.contains(org.toString()))
                    && acc.length() > 0 && seq.length() > 0) {
                String accession = acc.toString();
                String sequence = seq.toString();

                ProteinImpl protein = new ProteinImpl(acc.toString(), sequence,
                        name.length() == 0 ? null : name.toString(),
                        fullName.length() == 0 ? null : fullName.toString(),
                        org.length() == 0 ? null : org.toString());
                result.addProtein(protein);

                for (List l : sites) {
                    Integer site = (Integer) l.get(0);
                    PTM ptm = (PTM) l.get(1);
                    String enzyme = (String) l.get(2);
                    if (enzyme != null && enzyme.equalsIgnoreCase("autocatalysis")) {
                        enzyme = name.toString();
                    }

                    Map ann = (Map) l.get(3);
                    try {
                        PTMAnnotationUtil.annotate(protein, site, ptm, enzyme, ann);
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }

                if (keepAllIds) {
                    for (String ac : accs) {
                        mapIdMainId.put(ac, accession);
                    }
                    if (!accs.isEmpty())
                        protein.putInfo("other-accessions", new HashSet(accs));
                }

                //System.out.println(accession);
            }

            // prune the tree
            Element row = path.getCurrent();
            row.detach();
        }
    });

    // accession
    saxReader.addHandler("/uniprot/entry/accession", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            if (acc.length() == 0) {
                Element el = path.getCurrent();
                acc.append(el.getText());
                //                    if (keepAllIds) {
                //                        accs.add(acc.toString());
                //                    }
            } else {
                if (keepAllIds) {
                    accs.add(path.getCurrent().getText());
                }
            }

        }
    });

    // name
    saxReader.addHandler("/uniprot/entry/name", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            if (name.length() > 0)
                return;

            Element el = path.getCurrent();
            name.append(el.getText());
        }
    });

    // full name
    saxReader.addHandler("/uniprot/entry/protein/recommendedName/fullName", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            if (fullName.length() > 0)
                return;

            Element el = path.getCurrent();
            fullName.append(el.getTextTrim());
        }
    });

    saxReader.addHandler("/uniprot/entry/organism/name", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            if (org.length() > 0)
                return;

            Element el = path.getCurrent();
            String attr = el.attributeValue("type");
            if (attr == null || !attr.equalsIgnoreCase("scientific")) {
                return;
            }

            org.append(el.getText());
        }
    });

    saxReader.addHandler("/uniprot/entry/sequence", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            if (seq.length() > 0)
                return;

            Element el = path.getCurrent();
            seq.append(el.getText().replaceAll("\\p{Space}", ""));
        }
    });

    saxReader.addHandler("/uniprot/entry/feature", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            Element el = path.getCurrent();
            String type = el.attributeValue("type");
            if (type == null)
                return;

            PTM ptm = null;
            String enzyme = null;
            String description = null;
            String keyword = null;

            if (UNIPROT_TYPES.contains(type.toLowerCase())) {
                description = el.attributeValue("description");
                if (description == null)
                    return;

                String[] descs = description.split("; ");
                for (String desc : descs) {
                    PTM tmp = PTM.ofKeyword(desc);
                    if (tmp != null) {
                        ptm = tmp;
                        keyword = desc;
                    } else if (desc.startsWith("by ")) {
                        enzyme = desc.substring(3);
                    }
                }
            }
            //                else if (type.equalsIgnoreCase("glycosylation site")) {
            //                    description = el.attributeValue("description");
            //                    ptm = PTM.GLYCOSYLATION;
            //                }
            //                else if (type.equalsIgnoreCase()) {
            //                    description = el.attributeValue("description");
            //                    String[] descs = description.split("; ");
            //                    for (String desc : descs) {
            //                        PTM tmp = PTM.ofKeyword(desc);
            //                        if (tmp != null) {
            //                            ptm = tmp;
            //                            keyword = desc;
            //                        } else if (desc.startsWith("by ")) {
            //                            enzyme = desc.substring(3);
            //                        }
            //                    }
            //                }

            if (ptm == null || (ptmFilter != null && !ptmFilter.contains(ptm)))
                return;

            String status = el.attributeValue("status");
            if (status != null) {
                if (!includeBySimilarity && status.equalsIgnoreCase("By similarity"))
                    return;
                if (!includeProbable && status.equalsIgnoreCase("Probable"))
                    return;
                if (!includePotential && status.equalsIgnoreCase("Potential"))
                    return;
            }

            int site = -1;

            List<Element> locs = el.elements("location");
            for (Element loc : locs) {
                List<Element> poss = loc.elements("position");
                for (Element pos : poss) {
                    String str = pos.attributeValue("position");
                    if (str == null)
                        continue;

                    try {
                        site = Integer.parseInt(str) - 1; //start from 0
                    } catch (NumberFormatException e) {
                        continue;
                    }
                }
            }

            if (site != -1) {
                List l = new ArrayList();
                l.add(site);
                l.add(ptm);
                l.add(enzyme);
                Map<String, Object> m = new HashMap();
                if (keyword != null)
                    m.put("keyword", keyword);
                if (description != null)
                    m.put("description", description);
                if (status != null)
                    m.put("status", status);
                l.add(m);
                sites.add(l);
            }
        }
    });

    BufferedInputStream bis = new BufferedInputStream(is);

    try {
        saxReader.read(bis);
    } catch (DocumentException e) {
        throw new IOException(e.getMessage());
    }

    return result;
}

From source file:musite.taxonomy.UniprotTaxonomyXMLReader.java

License:Open Source License

public TaxonomyTree read(InputStream is) throws IOException {
    if (is == null) {
        throw new IllegalArgumentException();
    }// w w w  . j  av a 2  s.c  o m
    final TaxonomyTree tree = new TaxonomyTree();
    SAXReader saxReader = new SAXReader();
    final TaxonomyNode currentNode = new TaxonomyNode();

    // entry
    saxReader.addHandler("/RDF/Description", new ElementHandler() {
        public void onStart(ElementPath path) {
            currentNode.clearMembers();
            Element element = path.getCurrent();
            Attribute attribute = element.attribute("about");
            String TaxonomyID = attribute.getValue().replaceAll(UniprotTaxonomySettings.ID_ADDRESS, "");
            currentNode.setIdentifier(TaxonomyID);

        }

        public void onEnd(ElementPath path) {
            // process an element

            //create a new node
            TaxonomyNode node = tree.getTaxonomyNode(currentNode.getIdentifier());
            if (node == null) {
                node = new TaxonomyNode();
                currentNode.copyMembersTo(node);
                tree.addtoNodelist(node);
            } else {
                currentNode.copyMembersTo(node);
            }

            //change the parent from currentNode to node
            ArrayList<TaxonomyNode> parentlist = node.getParents();
            for (int i = 0; i < parentlist.size(); i++) {
                TaxonomyNode parent = parentlist.get(i);
                parent.getChildren().add(node);
            }

            // prune the tree
            Element row = path.getCurrent();
            row.detach();
        }
    });

    // type
    saxReader.addHandler("/RDF/Description/type", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            Element typeElement = (Element) path.getCurrent();
            Attribute typeAttribute = typeElement.attribute("resource");
            String Type = typeAttribute.getValue().replaceAll(UniprotTaxonomySettings.TYPE_ADDRESS, "");
            currentNode.setType(Type);

        }
    });

    // rank
    saxReader.addHandler("/RDF/Description/rank", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            Element rankElement = (Element) path.getCurrent();
            Attribute rankAttribute = rankElement.attribute("resource");
            String Rank = rankAttribute.getValue().replaceAll(UniprotTaxonomySettings.RANK_ADDRESS, "");
            currentNode.setRank(Rank);

        }
    });

    // scientificName
    saxReader.addHandler("/RDF/Description/scientificName", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            Element scientificnameElement = (Element) path.getCurrent();
            String ScientificName = scientificnameElement.getText();
            currentNode.setScientificName(ScientificName);
        }
    });
    // otherName
    saxReader.addHandler("/RDF/Description/otherName", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            Element othernameElement = (Element) path.getCurrent();
            String tempname = othernameElement.getText();
            currentNode.addOthernames(tempname);

        }
    });

    // partOfLineage
    saxReader.addHandler("/RDF/Description/partOfLineage", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            Element lineageElement = (Element) path.getCurrent();
            String temptext = lineageElement.getText();
            boolean partOfLineage;
            if (temptext.equals("true")) {
                partOfLineage = true;
            } else
                partOfLineage = false;
            currentNode.setPartOfLineage(partOfLineage);

        }
    });

    // Add parent
    saxReader.addHandler("/RDF/Description/subClassOf", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            Element subclassElement = (Element) path.getCurrent();
            Attribute subclassAttribute = subclassElement.attribute("resource");
            String subclassID = subclassAttribute.getValue().replaceAll(UniprotTaxonomySettings.ID_ADDRESS, "");
            TaxonomyNode parent = tree.getTaxonomyNode(subclassID);
            if (parent != null) {
                currentNode.addParentOnly(parent);
            } else {
                parent = new TaxonomyNode();
                parent.setIdentifier(subclassID);
                tree.addtoNodelist(parent);
                currentNode.addParentOnly(parent);
            }

        }
    });

    BufferedInputStream bis = new BufferedInputStream(is);

    Document doc;
    try {
        doc = saxReader.read(bis);
    } catch (DocumentException e) {
        throw new IOException(e.getMessage());
    }

    tree.searchRoot();
    return tree;
}

From source file:org.dom4j.samples.LargeDocumentDemo.java

License:Open Source License

public void onStart(ElementPath path) {
    Element element = path.getCurrent();
    println("onStart: of parsing element: " + element);
}

From source file:org.dom4j.samples.LargeDocumentDemo.java

License:Open Source License

public void onEnd(ElementPath path) {
    Element element = path.getCurrent();

    println("onEnd: of parsing element: " + element + " with: " + element.content().size()
            + " content node(s)");

    // now prune the current element to reduce memory
    element.detach();//from  ww w  . j a  v  a  2s.c om
}

From source file:org.localmatters.serializer.config.SerializationElementHandler.java

License:Apache License

/**
 * @see org.dom4j.ElementHandler#onEnd(org.dom4j.ElementPath)
 *//*from  w ww. j  a  v  a 2  s  .  c  om*/
@SuppressWarnings("unchecked")
public void onEnd(ElementPath elementPath) {
    Element root = elementPath.getCurrent();
    List<Element> elements = root.elements();
    for (Element element : elements) {
        handleId(element, new HashMap<String, String>(), true);
    }

    Set<String> invalids = resolveReferences(getReferences(), getSerializations());
    invalids.addAll(resolveExtensions(getExtensions(), getComplexWithIds()));

    if (CollectionUtils.isNotEmpty(invalids)) {
        throw new ConfigurationException(INVALID_ID_FORMAT, invalids);
    }
}

From source file:org.orbeon.oxf.xml.dom4j.LocationSAXContentHandler.java

License:Open Source License

public void startElement(String namespaceURI, String localName, String qualifiedName, Attributes attributes)
        throws SAXException {
    super.startElement(namespaceURI, localName, qualifiedName, attributes);
    final LocationData locationData = LocationData.createIfPresent(locator);
    if (locationData != null) {
        final ElementPath path = getElementStack();
        path.getCurrent().setData(locationData);
    }/*from  w w  w  .ja  v  a 2s.  co m*/
}

From source file:org.pentaho.di.trans.steps.getxmldata.GetXMLData.java

License:Apache License

protected boolean setDocument(String StringXML, FileObject file, boolean IsInXMLField, boolean readurl)
        throws KettleException {

    this.prevRow = buildEmptyRow(); // pre-allocate previous row

    try {/*from  w  w  w .  java2  s .  c om*/
        SAXReader reader = XMLParserFactoryProducer.getSAXReader(null);
        data.stopPruning = false;
        // Validate XML against specified schema?
        if (meta.isValidating()) {
            reader.setValidation(true);
            reader.setFeature("http://apache.org/xml/features/validation/schema", true);
        } else {
            // Ignore DTD declarations
            reader.setEntityResolver(new IgnoreDTDEntityResolver());
        }

        // Ignore comments?
        if (meta.isIgnoreComments()) {
            reader.setIgnoreComments(true);
        }

        if (data.prunePath != null) {
            // when pruning is on: reader.read() below will wait until all is processed in the handler
            if (log.isDetailed()) {
                logDetailed(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.Activated"));
            }
            if (data.PathValue.equals(data.prunePath)) {
                // Edge case, but if true, there will only ever be one item in the list
                data.an = new ArrayList<>(1); // pre-allocate array and sizes
                data.an.add(null);
            }
            reader.addHandler(data.prunePath, new ElementHandler() {
                public void onStart(ElementPath path) {
                    // do nothing here...
                }

                public void onEnd(ElementPath path) {
                    if (isStopped()) {
                        // when a large file is processed and it should be stopped it is still reading the hole thing
                        // the only solution I see is to prune / detach the document and this will lead into a
                        // NPE or other errors depending on the parsing location - this will be treated in the catch part below
                        // any better idea is welcome
                        if (log.isBasic()) {
                            logBasic(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.Stopped"));
                        }
                        data.stopPruning = true;
                        path.getCurrent().getDocument().detach(); // trick to stop reader
                        return;
                    }

                    // process a ROW element
                    if (log.isDebug()) {
                        logDebug(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.StartProcessing"));
                    }
                    Element row = path.getCurrent();
                    try {
                        // Pass over the row instead of just the document. If
                        // if there's only one row, there's no need to
                        // go back to the whole document.
                        processStreaming(row);
                    } catch (Exception e) {
                        // catch the KettleException or others and forward to caller, e.g. when applyXPath() has a problem
                        throw new RuntimeException(e);
                    }
                    // prune the tree
                    row.detach();
                    if (log.isDebug()) {
                        logDebug(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.EndProcessing"));
                    }
                }
            });
        }

        if (IsInXMLField) {
            // read string to parse
            data.document = reader.read(new StringReader(StringXML));
        } else if (readurl && KettleVFS.startsWithScheme(StringXML)) {
            data.document = reader.read(KettleVFS.getInputStream(StringXML));
        } else if (readurl) {
            // read url as source
            HttpClient client = HttpClientManager.getInstance().createDefaultClient();
            HttpGet method = new HttpGet(StringXML);
            method.addHeader("Accept-Encoding", "gzip");
            HttpResponse response = client.execute(method);
            Header contentEncoding = response.getFirstHeader("Content-Encoding");
            HttpEntity responseEntity = response.getEntity();
            if (responseEntity != null) {
                if (contentEncoding != null) {
                    String acceptEncodingValue = contentEncoding.getValue();
                    if (acceptEncodingValue.contains("gzip")) {
                        GZIPInputStream in = new GZIPInputStream(responseEntity.getContent());

                        data.document = reader.read(in);
                    }
                } else {
                    data.document = reader.read(responseEntity.getContent());
                }
            }
        } else {
            // get encoding. By default UTF-8
            String encoding = "UTF-8";
            if (!Utils.isEmpty(meta.getEncoding())) {
                encoding = meta.getEncoding();
            }
            InputStream is = KettleVFS.getInputStream(file);
            try {
                data.document = reader.read(is, encoding);
            } finally {
                BaseStep.closeQuietly(is);
            }
        }

        if (meta.isNamespaceAware()) {
            prepareNSMap(data.document.getRootElement());
        }
    } catch (Exception e) {
        if (data.stopPruning) {
            // ignore error when pruning
            return false;
        } else {
            throw new KettleException(e);
        }
    }
    return true;
}