List of usage examples for org.dom4j ElementHandler ElementHandler
ElementHandler
From source file:musite.taxonomy.UniprotTaxonomyXMLReader.java
License:Open Source License
public TaxonomyTree read(InputStream is) throws IOException { if (is == null) { throw new IllegalArgumentException(); }/*from w w w. ja v a 2s . c om*/ final TaxonomyTree tree = new TaxonomyTree(); SAXReader saxReader = new SAXReader(); final TaxonomyNode currentNode = new TaxonomyNode(); // entry saxReader.addHandler("/RDF/Description", new ElementHandler() { public void onStart(ElementPath path) { currentNode.clearMembers(); Element element = path.getCurrent(); Attribute attribute = element.attribute("about"); String TaxonomyID = attribute.getValue().replaceAll(UniprotTaxonomySettings.ID_ADDRESS, ""); currentNode.setIdentifier(TaxonomyID); } public void onEnd(ElementPath path) { // process an element //create a new node TaxonomyNode node = tree.getTaxonomyNode(currentNode.getIdentifier()); if (node == null) { node = new TaxonomyNode(); currentNode.copyMembersTo(node); tree.addtoNodelist(node); } else { currentNode.copyMembersTo(node); } //change the parent from currentNode to node ArrayList<TaxonomyNode> parentlist = node.getParents(); for (int i = 0; i < parentlist.size(); i++) { TaxonomyNode parent = parentlist.get(i); parent.getChildren().add(node); } // prune the tree Element row = path.getCurrent(); row.detach(); } }); // type saxReader.addHandler("/RDF/Description/type", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { Element typeElement = (Element) path.getCurrent(); Attribute typeAttribute = typeElement.attribute("resource"); String Type = typeAttribute.getValue().replaceAll(UniprotTaxonomySettings.TYPE_ADDRESS, ""); currentNode.setType(Type); } }); // rank saxReader.addHandler("/RDF/Description/rank", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { Element rankElement = (Element) path.getCurrent(); Attribute rankAttribute = rankElement.attribute("resource"); String Rank = rankAttribute.getValue().replaceAll(UniprotTaxonomySettings.RANK_ADDRESS, ""); currentNode.setRank(Rank); } }); // scientificName saxReader.addHandler("/RDF/Description/scientificName", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { Element scientificnameElement = (Element) path.getCurrent(); String ScientificName = scientificnameElement.getText(); currentNode.setScientificName(ScientificName); } }); // otherName saxReader.addHandler("/RDF/Description/otherName", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { Element othernameElement = (Element) path.getCurrent(); String tempname = othernameElement.getText(); currentNode.addOthernames(tempname); } }); // partOfLineage saxReader.addHandler("/RDF/Description/partOfLineage", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { Element lineageElement = (Element) path.getCurrent(); String temptext = lineageElement.getText(); boolean partOfLineage; if (temptext.equals("true")) { partOfLineage = true; } else partOfLineage = false; currentNode.setPartOfLineage(partOfLineage); } }); // Add parent saxReader.addHandler("/RDF/Description/subClassOf", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { Element subclassElement = (Element) path.getCurrent(); Attribute subclassAttribute = subclassElement.attribute("resource"); String subclassID = subclassAttribute.getValue().replaceAll(UniprotTaxonomySettings.ID_ADDRESS, ""); TaxonomyNode parent = tree.getTaxonomyNode(subclassID); if (parent != null) { currentNode.addParentOnly(parent); } else { parent = new TaxonomyNode(); parent.setIdentifier(subclassID); tree.addtoNodelist(parent); currentNode.addParentOnly(parent); } } }); BufferedInputStream bis = new BufferedInputStream(is); Document doc; try { doc = saxReader.read(bis); } catch (DocumentException e) { throw new IOException(e.getMessage()); } tree.searchRoot(); return tree; }
From source file:org.pentaho.di.trans.steps.getxmldata.GetXMLData.java
License:Apache License
protected boolean setDocument(String StringXML, FileObject file, boolean IsInXMLField, boolean readurl) throws KettleException { this.prevRow = buildEmptyRow(); // pre-allocate previous row try {// www. j a va 2 s .c om SAXReader reader = XMLParserFactoryProducer.getSAXReader(null); data.stopPruning = false; // Validate XML against specified schema? if (meta.isValidating()) { reader.setValidation(true); reader.setFeature("http://apache.org/xml/features/validation/schema", true); } else { // Ignore DTD declarations reader.setEntityResolver(new IgnoreDTDEntityResolver()); } // Ignore comments? if (meta.isIgnoreComments()) { reader.setIgnoreComments(true); } if (data.prunePath != null) { // when pruning is on: reader.read() below will wait until all is processed in the handler if (log.isDetailed()) { logDetailed(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.Activated")); } if (data.PathValue.equals(data.prunePath)) { // Edge case, but if true, there will only ever be one item in the list data.an = new ArrayList<>(1); // pre-allocate array and sizes data.an.add(null); } reader.addHandler(data.prunePath, new ElementHandler() { public void onStart(ElementPath path) { // do nothing here... } public void onEnd(ElementPath path) { if (isStopped()) { // when a large file is processed and it should be stopped it is still reading the hole thing // the only solution I see is to prune / detach the document and this will lead into a // NPE or other errors depending on the parsing location - this will be treated in the catch part below // any better idea is welcome if (log.isBasic()) { logBasic(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.Stopped")); } data.stopPruning = true; path.getCurrent().getDocument().detach(); // trick to stop reader return; } // process a ROW element if (log.isDebug()) { logDebug(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.StartProcessing")); } Element row = path.getCurrent(); try { // Pass over the row instead of just the document. If // if there's only one row, there's no need to // go back to the whole document. processStreaming(row); } catch (Exception e) { // catch the KettleException or others and forward to caller, e.g. when applyXPath() has a problem throw new RuntimeException(e); } // prune the tree row.detach(); if (log.isDebug()) { logDebug(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.EndProcessing")); } } }); } if (IsInXMLField) { // read string to parse data.document = reader.read(new StringReader(StringXML)); } else if (readurl && KettleVFS.startsWithScheme(StringXML)) { data.document = reader.read(KettleVFS.getInputStream(StringXML)); } else if (readurl) { // read url as source HttpClient client = HttpClientManager.getInstance().createDefaultClient(); HttpGet method = new HttpGet(StringXML); method.addHeader("Accept-Encoding", "gzip"); HttpResponse response = client.execute(method); Header contentEncoding = response.getFirstHeader("Content-Encoding"); HttpEntity responseEntity = response.getEntity(); if (responseEntity != null) { if (contentEncoding != null) { String acceptEncodingValue = contentEncoding.getValue(); if (acceptEncodingValue.contains("gzip")) { GZIPInputStream in = new GZIPInputStream(responseEntity.getContent()); data.document = reader.read(in); } } else { data.document = reader.read(responseEntity.getContent()); } } } else { // get encoding. By default UTF-8 String encoding = "UTF-8"; if (!Utils.isEmpty(meta.getEncoding())) { encoding = meta.getEncoding(); } InputStream is = KettleVFS.getInputStream(file); try { data.document = reader.read(is, encoding); } finally { BaseStep.closeQuietly(is); } } if (meta.isNamespaceAware()) { prepareNSMap(data.document.getRootElement()); } } catch (Exception e) { if (data.stopPruning) { // ignore error when pruning return false; } else { throw new KettleException(e); } } return true; }
From source file:org.snipsnap.snip.XMLSnipImport.java
License:Open Source License
/** * Load snips and users into the SnipSpace from an xml document out of a stream. * * @param in the input stream to load from * @param flags whether or not to overwrite existing content *///from www.j a v a 2 s.c o m public static void load(InputStream in, final int flags) throws IOException { SAXReader saxReader = new SAXReader(); try { saxReader.addHandler("/snipspace/user", new ElementHandler() { public void onStart(ElementPath elementPath) { // nothing to do here ... } public void onEnd(ElementPath elementPath) { Element userElement = elementPath.getCurrent(); if ((flags & IMPORT_USERS) != 0) { try { XMLSnipImport.loadUser(elementPath.getCurrent(), flags); } catch (Exception e) { Logger.fatal("XMLSnipImport: error importing user: " + userElement.elementText("name")); } getStatus().inc(); } // prune the element to save memory userElement.detach(); } }); saxReader.addHandler("/snipspace/snip", new ElementHandler() { public void onStart(ElementPath elementPath) { // nothing to do here ... } public void onEnd(ElementPath elementPath) { Element snipElement = elementPath.getCurrent(); if ((flags & IMPORT_SNIPS) != 0) { try { XMLSnipImport.loadSnip(snipElement, flags); } catch (Exception e) { Logger.fatal("XMLSnipImport: error importing snip: " + snipElement.elementText("name")); } getStatus().inc(); } // prune the element to save memory snipElement.detach(); } }); // add a reader wrapper to remove illegal characters from input stream // it looks like the database export (XMLWriter) allows these to get through InputStreamReader reader = new InputStreamReader(in, "UTF-8") { public int read(char[] chars) throws IOException { int n = super.read(chars); for (int i = 0; i < n; i++) { chars[i] = replaceIfIllegal(chars[i]); } return n; } public int read(char[] chars, int start, int length) throws IOException { int n = super.read(chars, start, length); for (int i = 0; i < n; i++) { chars[i] = replaceIfIllegal(chars[i]); } return n; } private char replaceIfIllegal(char c) { if (c < 0x20 && !(c == 0x09 || c == 0x0a || c == 0x0d)) { charErrCount++; return (char) 0x20; } return c; } }; saxReader.read(reader); Logger.warn("XMLSnipImport: corrected " + charErrCount + " characters in input"); Logger.log("XMLSnipImport: imported " + getStatus().getValue() + " data records"); } catch (DocumentException e) { Logger.warn("XMLSnipImport: unable to parse document", e); throw new IOException("Error parsing document: " + e); } }
From source file:uidserver.Config.java
public Config(String configFilePath) { File configFile = new File(configFilePath); if (configFile.exists()) { try {//from ww w . ja v a2 s .c o m SAXReader reader = new SAXReader(); reader.addHandler("/config", new ElementHandler() { @Override public void onStart(ElementPath elementPath) { } @Override public void onEnd(ElementPath elementPath) { Element row = elementPath.getCurrent(); readElement(row); row.detach(); } private void readElement(Element row) { List<Element> nodes = row.elements(); if (!nodes.isEmpty()) { for (Element node : nodes) { String name = node.getName().toLowerCase(); String value = node.getText(); switch (name) { case "logpath": logPath = new File(value); break; case "port": port = value; break; case "timeout": timeOut = Integer.valueOf(value); break; case "uidfile": uidFile = new File(value); break; } } } else { System.out.println("Error: empty elements in config file, please add correct setup"); System.exit(0); } } }); reader.setValidation(false); Document document = reader.read(configFile); if (logPath != null && port != null && uidFile != null) { if (!logPath.exists()) { if (!logPath.mkdirs()) { System.out.println("Failed to create log file: " + logPath.getAbsoluteFile()); System.out.println("Please setup correct log file path"); System.exit(0); } } } else { System.out.println("Please set up correct Port/LogFile/UidFile"); System.exit(0); } } catch (DocumentException ex) { Logger.getLogger(Config.class.getName()).log(Level.SEVERE, null, ex); System.out.println("Error during reading xml config file, please double check file content"); System.exit(0); } } else { System.out.println("The specified config file: " + configFile.getAbsolutePath() + " doesn't exist"); System.out.println("Please key in correct config file path"); System.exit(0); } }