Example usage for org.dom4j ElementHandler ElementHandler

List of usage examples for org.dom4j ElementHandler ElementHandler

Introduction

In this page you can find the example usage for org.dom4j ElementHandler ElementHandler.

Prototype

ElementHandler

Source Link

Usage

From source file:musite.taxonomy.UniprotTaxonomyXMLReader.java

License:Open Source License

public TaxonomyTree read(InputStream is) throws IOException {
    if (is == null) {
        throw new IllegalArgumentException();
    }/*from   w  w  w.  ja  v a 2s  . c  om*/
    final TaxonomyTree tree = new TaxonomyTree();
    SAXReader saxReader = new SAXReader();
    final TaxonomyNode currentNode = new TaxonomyNode();

    // entry
    saxReader.addHandler("/RDF/Description", new ElementHandler() {
        public void onStart(ElementPath path) {
            currentNode.clearMembers();
            Element element = path.getCurrent();
            Attribute attribute = element.attribute("about");
            String TaxonomyID = attribute.getValue().replaceAll(UniprotTaxonomySettings.ID_ADDRESS, "");
            currentNode.setIdentifier(TaxonomyID);

        }

        public void onEnd(ElementPath path) {
            // process an element

            //create a new node
            TaxonomyNode node = tree.getTaxonomyNode(currentNode.getIdentifier());
            if (node == null) {
                node = new TaxonomyNode();
                currentNode.copyMembersTo(node);
                tree.addtoNodelist(node);
            } else {
                currentNode.copyMembersTo(node);
            }

            //change the parent from currentNode to node
            ArrayList<TaxonomyNode> parentlist = node.getParents();
            for (int i = 0; i < parentlist.size(); i++) {
                TaxonomyNode parent = parentlist.get(i);
                parent.getChildren().add(node);
            }

            // prune the tree
            Element row = path.getCurrent();
            row.detach();
        }
    });

    // type
    saxReader.addHandler("/RDF/Description/type", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            Element typeElement = (Element) path.getCurrent();
            Attribute typeAttribute = typeElement.attribute("resource");
            String Type = typeAttribute.getValue().replaceAll(UniprotTaxonomySettings.TYPE_ADDRESS, "");
            currentNode.setType(Type);

        }
    });

    // rank
    saxReader.addHandler("/RDF/Description/rank", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            Element rankElement = (Element) path.getCurrent();
            Attribute rankAttribute = rankElement.attribute("resource");
            String Rank = rankAttribute.getValue().replaceAll(UniprotTaxonomySettings.RANK_ADDRESS, "");
            currentNode.setRank(Rank);

        }
    });

    // scientificName
    saxReader.addHandler("/RDF/Description/scientificName", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            Element scientificnameElement = (Element) path.getCurrent();
            String ScientificName = scientificnameElement.getText();
            currentNode.setScientificName(ScientificName);
        }
    });
    // otherName
    saxReader.addHandler("/RDF/Description/otherName", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            Element othernameElement = (Element) path.getCurrent();
            String tempname = othernameElement.getText();
            currentNode.addOthernames(tempname);

        }
    });

    // partOfLineage
    saxReader.addHandler("/RDF/Description/partOfLineage", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            Element lineageElement = (Element) path.getCurrent();
            String temptext = lineageElement.getText();
            boolean partOfLineage;
            if (temptext.equals("true")) {
                partOfLineage = true;
            } else
                partOfLineage = false;
            currentNode.setPartOfLineage(partOfLineage);

        }
    });

    // Add parent
    saxReader.addHandler("/RDF/Description/subClassOf", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            Element subclassElement = (Element) path.getCurrent();
            Attribute subclassAttribute = subclassElement.attribute("resource");
            String subclassID = subclassAttribute.getValue().replaceAll(UniprotTaxonomySettings.ID_ADDRESS, "");
            TaxonomyNode parent = tree.getTaxonomyNode(subclassID);
            if (parent != null) {
                currentNode.addParentOnly(parent);
            } else {
                parent = new TaxonomyNode();
                parent.setIdentifier(subclassID);
                tree.addtoNodelist(parent);
                currentNode.addParentOnly(parent);
            }

        }
    });

    BufferedInputStream bis = new BufferedInputStream(is);

    Document doc;
    try {
        doc = saxReader.read(bis);
    } catch (DocumentException e) {
        throw new IOException(e.getMessage());
    }

    tree.searchRoot();
    return tree;
}

From source file:org.pentaho.di.trans.steps.getxmldata.GetXMLData.java

License:Apache License

protected boolean setDocument(String StringXML, FileObject file, boolean IsInXMLField, boolean readurl)
        throws KettleException {

    this.prevRow = buildEmptyRow(); // pre-allocate previous row

    try {//  www.  j  a  va  2 s .c om
        SAXReader reader = XMLParserFactoryProducer.getSAXReader(null);
        data.stopPruning = false;
        // Validate XML against specified schema?
        if (meta.isValidating()) {
            reader.setValidation(true);
            reader.setFeature("http://apache.org/xml/features/validation/schema", true);
        } else {
            // Ignore DTD declarations
            reader.setEntityResolver(new IgnoreDTDEntityResolver());
        }

        // Ignore comments?
        if (meta.isIgnoreComments()) {
            reader.setIgnoreComments(true);
        }

        if (data.prunePath != null) {
            // when pruning is on: reader.read() below will wait until all is processed in the handler
            if (log.isDetailed()) {
                logDetailed(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.Activated"));
            }
            if (data.PathValue.equals(data.prunePath)) {
                // Edge case, but if true, there will only ever be one item in the list
                data.an = new ArrayList<>(1); // pre-allocate array and sizes
                data.an.add(null);
            }
            reader.addHandler(data.prunePath, new ElementHandler() {
                public void onStart(ElementPath path) {
                    // do nothing here...
                }

                public void onEnd(ElementPath path) {
                    if (isStopped()) {
                        // when a large file is processed and it should be stopped it is still reading the hole thing
                        // the only solution I see is to prune / detach the document and this will lead into a
                        // NPE or other errors depending on the parsing location - this will be treated in the catch part below
                        // any better idea is welcome
                        if (log.isBasic()) {
                            logBasic(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.Stopped"));
                        }
                        data.stopPruning = true;
                        path.getCurrent().getDocument().detach(); // trick to stop reader
                        return;
                    }

                    // process a ROW element
                    if (log.isDebug()) {
                        logDebug(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.StartProcessing"));
                    }
                    Element row = path.getCurrent();
                    try {
                        // Pass over the row instead of just the document. If
                        // if there's only one row, there's no need to
                        // go back to the whole document.
                        processStreaming(row);
                    } catch (Exception e) {
                        // catch the KettleException or others and forward to caller, e.g. when applyXPath() has a problem
                        throw new RuntimeException(e);
                    }
                    // prune the tree
                    row.detach();
                    if (log.isDebug()) {
                        logDebug(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.EndProcessing"));
                    }
                }
            });
        }

        if (IsInXMLField) {
            // read string to parse
            data.document = reader.read(new StringReader(StringXML));
        } else if (readurl && KettleVFS.startsWithScheme(StringXML)) {
            data.document = reader.read(KettleVFS.getInputStream(StringXML));
        } else if (readurl) {
            // read url as source
            HttpClient client = HttpClientManager.getInstance().createDefaultClient();
            HttpGet method = new HttpGet(StringXML);
            method.addHeader("Accept-Encoding", "gzip");
            HttpResponse response = client.execute(method);
            Header contentEncoding = response.getFirstHeader("Content-Encoding");
            HttpEntity responseEntity = response.getEntity();
            if (responseEntity != null) {
                if (contentEncoding != null) {
                    String acceptEncodingValue = contentEncoding.getValue();
                    if (acceptEncodingValue.contains("gzip")) {
                        GZIPInputStream in = new GZIPInputStream(responseEntity.getContent());

                        data.document = reader.read(in);
                    }
                } else {
                    data.document = reader.read(responseEntity.getContent());
                }
            }
        } else {
            // get encoding. By default UTF-8
            String encoding = "UTF-8";
            if (!Utils.isEmpty(meta.getEncoding())) {
                encoding = meta.getEncoding();
            }
            InputStream is = KettleVFS.getInputStream(file);
            try {
                data.document = reader.read(is, encoding);
            } finally {
                BaseStep.closeQuietly(is);
            }
        }

        if (meta.isNamespaceAware()) {
            prepareNSMap(data.document.getRootElement());
        }
    } catch (Exception e) {
        if (data.stopPruning) {
            // ignore error when pruning
            return false;
        } else {
            throw new KettleException(e);
        }
    }
    return true;
}

From source file:org.snipsnap.snip.XMLSnipImport.java

License:Open Source License

/**
 * Load snips and users into the SnipSpace from an xml document out of a stream.
 *
 * @param in    the input stream to load from
 * @param flags whether or not to overwrite existing content
 *///from  www.j a  v a  2  s.c  o m
public static void load(InputStream in, final int flags) throws IOException {
    SAXReader saxReader = new SAXReader();
    try {
        saxReader.addHandler("/snipspace/user", new ElementHandler() {
            public void onStart(ElementPath elementPath) {
                // nothing to do here ...
            }

            public void onEnd(ElementPath elementPath) {
                Element userElement = elementPath.getCurrent();
                if ((flags & IMPORT_USERS) != 0) {
                    try {
                        XMLSnipImport.loadUser(elementPath.getCurrent(), flags);
                    } catch (Exception e) {
                        Logger.fatal("XMLSnipImport: error importing user: " + userElement.elementText("name"));
                    }
                    getStatus().inc();
                }
                // prune the element to save memory
                userElement.detach();
            }
        });

        saxReader.addHandler("/snipspace/snip", new ElementHandler() {
            public void onStart(ElementPath elementPath) {
                // nothing to do here ...
            }

            public void onEnd(ElementPath elementPath) {
                Element snipElement = elementPath.getCurrent();
                if ((flags & IMPORT_SNIPS) != 0) {
                    try {
                        XMLSnipImport.loadSnip(snipElement, flags);
                    } catch (Exception e) {
                        Logger.fatal("XMLSnipImport: error importing snip: " + snipElement.elementText("name"));
                    }
                    getStatus().inc();
                }
                // prune the element to save memory
                snipElement.detach();
            }
        });

        // add a reader wrapper to remove illegal characters from input stream
        // it looks like the database export (XMLWriter) allows these to get through
        InputStreamReader reader = new InputStreamReader(in, "UTF-8") {
            public int read(char[] chars) throws IOException {
                int n = super.read(chars);
                for (int i = 0; i < n; i++) {
                    chars[i] = replaceIfIllegal(chars[i]);
                }
                return n;
            }

            public int read(char[] chars, int start, int length) throws IOException {
                int n = super.read(chars, start, length);
                for (int i = 0; i < n; i++) {
                    chars[i] = replaceIfIllegal(chars[i]);
                }
                return n;
            }

            private char replaceIfIllegal(char c) {
                if (c < 0x20 && !(c == 0x09 || c == 0x0a || c == 0x0d)) {
                    charErrCount++;
                    return (char) 0x20;
                }
                return c;
            }
        };

        saxReader.read(reader);
        Logger.warn("XMLSnipImport: corrected " + charErrCount + " characters in input");
        Logger.log("XMLSnipImport: imported " + getStatus().getValue() + " data records");
    } catch (DocumentException e) {
        Logger.warn("XMLSnipImport: unable to parse document", e);
        throw new IOException("Error parsing document: " + e);
    }
}

From source file:uidserver.Config.java

public Config(String configFilePath) {
    File configFile = new File(configFilePath);
    if (configFile.exists()) {
        try {//from ww  w  .  ja v  a2 s .c  o m
            SAXReader reader = new SAXReader();
            reader.addHandler("/config", new ElementHandler() {
                @Override
                public void onStart(ElementPath elementPath) {

                }

                @Override
                public void onEnd(ElementPath elementPath) {
                    Element row = elementPath.getCurrent();
                    readElement(row);
                    row.detach();
                }

                private void readElement(Element row) {
                    List<Element> nodes = row.elements();
                    if (!nodes.isEmpty()) {
                        for (Element node : nodes) {
                            String name = node.getName().toLowerCase();
                            String value = node.getText();
                            switch (name) {
                            case "logpath":
                                logPath = new File(value);
                                break;
                            case "port":
                                port = value;
                                break;
                            case "timeout":
                                timeOut = Integer.valueOf(value);
                                break;
                            case "uidfile":
                                uidFile = new File(value);
                                break;
                            }

                        }

                    } else {
                        System.out.println("Error: empty elements in config file, please add correct setup");
                        System.exit(0);
                    }

                }

            });

            reader.setValidation(false);
            Document document = reader.read(configFile);
            if (logPath != null && port != null && uidFile != null) {
                if (!logPath.exists()) {
                    if (!logPath.mkdirs()) {
                        System.out.println("Failed to create log file: " + logPath.getAbsoluteFile());
                        System.out.println("Please setup correct log file path");
                        System.exit(0);
                    }
                }

            } else {
                System.out.println("Please set up correct Port/LogFile/UidFile");
                System.exit(0);
            }
        } catch (DocumentException ex) {
            Logger.getLogger(Config.class.getName()).log(Level.SEVERE, null, ex);
            System.out.println("Error during reading xml config file, please double check file content");
            System.exit(0);
        }
    } else {
        System.out.println("The specified config file: " + configFile.getAbsolutePath() + " doesn't exist");
        System.out.println("Please key in correct config file path");
        System.exit(0);
    }
}