Example usage for org.dom4j ElementHandler ElementHandler

Introduction

In this page you can find the example usage for org.dom4j ElementHandler ElementHandler.

Prototype

ElementHandler

Source Link

Usage

From source file:com.globalsight.terminology.util.MtfAnalyzer.java

License:Apache License

public void analyze(String p_url) throws Exception {
    m_entryCount = 0;/* ww w.j  a v  a2  s . c  om*/

    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");

    System.err.println("Analyzing document: " + p_url);

    // enable element complete notifications to conserve memory
    reader.addHandler("/mtf/conceptGrp", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;

            if (m_entryCount % 200 == 0) {
                log("Entry " + m_entryCount);
            }
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    Document document = reader.read(p_url);

    log("Total entries: " + m_entryCount);

    // all done
}

From source file:com.globalsight.terminology.util.MtfSplitter.java

License:Apache License

public void split(String p_url, String p_numEntries) throws Exception {
    final int maxEntries = Integer.parseInt(p_numEntries);
    final String baseName = getBaseName(p_url);
    final String extension = getExtension(p_url);

    m_entryCount = 0;/*from   www.j  a va 2s.  com*/

    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");

    log("Splitting document `" + p_url + "'");

    startFile(baseName, extension);

    // enable element complete notifications to conserve memory
    reader.addHandler("/mtf/conceptGrp", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;

            if (m_entryCount % maxEntries == 0) {
                try {
                    closeFile();
                    startFile(baseName, extension);
                } catch (Exception ex) {
                    log(ex.toString());
                    System.exit(1);
                }
            }
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            writeEntry(element.asXML());

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    Document document = reader.read(p_url);

    closeFile();

    // all done
}

From source file:com.globalsight.webservices.Ambassador.java

License:Apache License

/**
 * Updates a tu in database./*from   w w  w .j av  a  2 s  .c om*/
 * 
 * @param accessToken
 *            To judge caller has logon or not, can not be null. you can get
 *            it by calling method <code>login(username, password)</code>.
 * @param tmName
 *            TM name, will used to get tm id.
 * @param companyName
 *            company name, will used to get tm id.
 * @param tmx
 *            A tmx formate string inlcluding all tu information.
 * @return "true" if succeed
 * @throws WebServiceException
 */
public String editTu(String accessToken, String tmName, String companyName, String tmx)
        throws WebServiceException {
    try {
        Assert.assertNotEmpty(accessToken, "access token");
        Assert.assertNotEmpty(tmx, "tmx format");
    } catch (Exception e) {
        logger.error(e.getMessage(), e);
        throw new WebServiceException(e.getMessage());
    }

    checkAccess(accessToken, "editEntry");
    checkPermission(accessToken, Permission.TM_EDIT_ENTRY);

    Company company = getCompanyByName(companyName);
    if (company == null) {
        throw new WebServiceException("Can not find the company with name (" + companyName + ")");
    }
    final ProjectTM ptm = getProjectTm(tmName, company.getIdAsLong());
    if (ptm == null) {
        throw new WebServiceException(
                "Can not find the tm with tm name (" + tmName + ") and company name (" + companyName + ")");
    }

    SAXReader reader = new SAXReader();
    ElementHandler handler = new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();
            element.detach();

            try {
                normalizeTu(element);
                validateTu(element);
                if (ptm.getTm3Id() == null) {
                    editTm2Tu(element);
                } else {
                    editTm3Tu(element, ptm);
                }
            } catch (Throwable ex) {
                logger.error(ex.getMessage(), ex);
                throw new ThreadDeath();
            }
        }
    };
    reader.addHandler("/tu", handler);

    WebServicesLog.Start activityStart = null;
    try {
        String loggedUserName = this.getUsernameFromSession(accessToken);
        Map<Object, Object> activityArgs = new HashMap<Object, Object>();
        activityArgs.put("loggedUserName", loggedUserName);
        activityStart = WebServicesLog.start(Ambassador.class, "editTu(accessToken,tmx)", activityArgs);
        reader.read(new StringReader(tmx));
    } catch (DocumentException e) {
        logger.error(e.getMessage(), e);
        throw new WebServiceException(e.getMessage());
    } finally {
        if (activityStart != null) {
            activityStart.end();
        }
    }

    return "true";
}

From source file:com.nokia.config.SAXConfigParser.java

License:Open Source License

/**
 * Constructor/*from www.  jav a 2 s  .  c o m*/
 * @return list of available configurations that can be built.
 */
public String getConfigs() {
    File file = new File(sysdefFile);
    SAXReader reader = new SAXReader();
    reader.addHandler("/SystemDefinition/build/target", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element row = path.getCurrent();
            Iterator itr = row.attributeIterator();
            while (itr.hasNext()) {
                Attribute child = (Attribute) itr.next();
                String attrName = child.getQualifiedName();
                if (attrName.equals("name")) {
                    configs += (String) child.getValue() + ",";
                }
            }
            row.detach();
        }
    });
    try {
        Document doc = reader.read(file);
    } catch (Exception e) {
        e.printStackTrace();
    }
    return configs;
}

From source file:com.nokia.helium.sbs.SAXSysdefParser.java

License:Open Source License

/**
 * Constructor//from   w ww .  ja  v a  2  s .c  om
 * 
 * @return list of available configurations that can be built.
 */
public void parseConfig(String nodeToGet) {
    layers = new ArrayList<String>();
    SAXReader reader = new SAXReader();
    reader.addHandler("/SystemDefinition/systemModel/" + nodeToGet, new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element row = path.getCurrent();
            Iterator itr = row.attributeIterator();
            while (itr.hasNext()) {
                Attribute child = (Attribute) itr.next();
                String attrName = child.getQualifiedName();
                if (attrName.equals("name")) {
                    layers.add(child.getValue());
                }
            }
            row.detach();
        }
    });
    try {
        reader.read(sysdefFile);
    } catch (DocumentException e) {
        e.printStackTrace();
    }
}

From source file:com.panet.imeta.trans.steps.getxmldata.GetXMLData.java

License:Open Source License

protected boolean setDocument(String StringXML, FileObject file, boolean IsInXMLField, boolean readurl)
        throws KettleException {

    try {/*from  w  w  w .j av  a  2s . co  m*/
        SAXReader reader = new SAXReader();
        data.stopPruning = false;

        // Validate XML against specified schema?
        if (meta.isValidating()) {
            reader.setValidation(true);
            reader.setFeature("http://apache.org/xml/features/validation/schema", true);
        }

        // Ignore comments?
        if (meta.isIgnoreComments())
            reader.setIgnoreComments(true);

        if (data.prunePath != null) {
            // when pruning is on: reader.read() below will wait until all
            // is processed in the handler
            if (log.isDetailed())
                logDetailed(Messages.getString("GetXMLData.Log.StreamingMode.Activated"));
            reader.addHandler(data.prunePath, new ElementHandler() {
                public void onStart(ElementPath path) {
                    // do nothing here...
                }

                public void onEnd(ElementPath path) {
                    if (isStopped()) {
                        // when a large file is processed and it should be
                        // stopped it is still reading the hole thing
                        // the only solution I see is to prune / detach the
                        // document and this will lead into a
                        // NPE or other errors depending on the parsing
                        // location - this will be treated in the catch part
                        // below
                        // any better idea is welcome
                        if (log.isBasic())
                            logBasic(Messages.getString("GetXMLData.Log.StreamingMode.Stopped"));
                        data.stopPruning = true;
                        path.getCurrent().getDocument().detach(); // trick
                        // to
                        // stop
                        // reader
                        return;
                    }

                    // process a ROW element
                    if (log.isDebug())
                        logDebug(Messages.getString("GetXMLData.Log.StreamingMode.StartProcessing"));
                    Element row = path.getCurrent();
                    try {
                        processStreaming(row.getDocument());
                    } catch (Exception e) {
                        // catch the KettleException or others and forward
                        // to caller, e.g. when applyXPath() has a problem
                        throw new RuntimeException(e);
                    }
                    // prune the tree
                    row.detach();
                    if (log.isDebug())
                        logDebug(Messages.getString("GetXMLData.Log.StreamingMode.EndProcessing"));
                }
            });
        }

        if (IsInXMLField) {
            // read string to parse
            data.document = reader.read(new StringReader(StringXML));
        } else if (readurl) {
            // read url as source
            data.document = reader.read(new URL(StringXML));
        } else {
            // get encoding. By default UTF-8
            String encoding = "UTF-8";
            if (!Const.isEmpty(meta.getEncoding()))
                encoding = meta.getEncoding();
            data.document = reader.read(KettleVFS.getInputStream(file), encoding);
        }

        if (meta.isNamespaceAware())
            prepareNSMap(data.document.getRootElement());
    } catch (Exception e) {
        if (data.stopPruning) {
            // ignore error when pruning
            return false;
        } else {
            throw new KettleException(e);
        }
    }
    return true;
}

From source file:galign.helpers.tmx.TmxFile.java

License:Apache License

/**
 * Reads and validates a TMX XML string.
 *///  www. j a  v a2s . c om
protected void init(SAXReader p_reader, InputSource p_input) throws org.dom4j.DocumentException {
    SAXReader reader = p_reader;

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx", new ElementHandler() {
        final public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_tmxVersion = element.attributeValue("version");
        }

        final public void onEnd(ElementPath path) {
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/header", new ElementHandler() {
        final public void onStart(ElementPath path) {
        }

        final public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            m_header = new TmxHeader(element);
            m_header.setTmxVersion(m_tmxVersion);

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu", new ElementHandler() {
        final public void onStart(ElementPath path) {
        }

        final public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            addTu(new Tu(element));

            // prune the current element to reduce memory
            element.detach();
            element = null;
        }
    });

    Document document = reader.read(p_input);
    // all done.
}

From source file:musite.io.xml.PredictionResultXMLReader.java

License:Open Source License

public PredictionResult read(InputStream is) throws IOException {
    if (is == null) {
        throw new IllegalArgumentException();
    }// w  ww.  j  av a  2s .c  o  m

    final PredictionResult result = data == null ? new PredictionResultImpl() : data;

    ProteinsXMLReader proteinsReader = ProteinsXMLReader.createReader(result, false);
    proteinsReader.setRoot(root);

    ProteinResidueAnnotationReader annReader = new ProteinResidueAnnotationReader();
    annReader.putAnnotationFieldReader(musite.prediction.PredictionResult.ANNOTATION_FIELD_SCORE,
            SimpleFieldXMLReader.createDoubleCollectionReader());
    proteinsReader.putProteinFieldReader(musite.ResidueAnnotationUtil.RESIDUE_ANNOTATION, annReader);

    String path = "/model-list/model";
    if (root != null)
        path = "/" + root + path;
    proteinsReader.addSaxReaderHandler(path, new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element elem = path.getCurrent();
            String name = StringEscapeUtils.unescapeXml(elem.attributeValue("name"));

            PTM ptm = null;
            String strPTM = StringEscapeUtils.unescapeXml(elem.elementText("ptm"));
            if (strPTM != null)
                ptm = PTM.valueOf(strPTM);

            Set<AminoAcid> aas = null;
            String strAAs = StringEscapeUtils.unescapeXml(elem.elementText("amino-acids"));
            if (strAAs != null) {
                String[] strs = strAAs.replaceAll("\n", "").split(";");
                int n = strs.length;
                aas = new HashSet(n);
                for (int i = 0; i < n; i++) {
                    String str = strs[i].trim();
                    aas.add(AminoAcid.valueOf(str));
                }
            }

            SpecificityEstimatorImpl si = null;
            String strSI = StringEscapeUtils.unescapeXml(elem.elementText("spec-estimate-data"));
            if (strSI != null) {
                String[] strs = strSI.replaceAll("\n", "").split(";");
                List<Double> train = new ArrayList();
                for (String str : strs) {
                    train.add(Double.valueOf(str.trim()));
                }
                si = new SpecificityEstimatorImpl(train);
            }

            String comment = StringEscapeUtils.unescapeXml(elem.elementText("comment"));
            if (comment != null)
                comment = comment.replaceAll("%EOL%", "\n");

            PredictionModelImpl model = new PredictionModelImpl.Builder().name(name).ptm(ptm).aminoAcids(aas)
                    .specEstimator(si).comment(comment).build();

            result.addModel(model);

            // prune the tree
            elem.detach();
        }
    });

    proteinsReader.read(is);

    return result;
}

From source file:musite.io.xml.ProteinsXMLReader.java

License:Open Source License

public ProteinsXMLReader(Proteins proteins) {
    this.data = proteins;
    nullData = proteins == null;//w w w.j av  a2  s  .  c om
    proteinFieldReaders = new HashMap();
    fieldFilter = null;
    saxReaderHandler = new HashMap();
    String path = "/protein-list/protein";
    if (root != null)
        path = "/" + root + path;
    addSaxReaderHandler(path, new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            ProteinImpl protein = new ProteinImpl();

            Element elem = path.getCurrent();
            Iterator<Element> itr = elem.elementIterator();
            while (itr.hasNext()) {
                Element field = (Element) itr.next();
                String name = field.getQualifiedName();
                if (fieldFilter != null && fieldFilterInclude != fieldFilter.contains(name))
                    continue;

                Object obj;

                Reader fieldReader = proteinFieldReaders.get(name);
                if (fieldReader != null) {
                    try {
                        String text = nodeContentToString(field);//field.getTextTrim();
                        InputStream bais = StringUtil.toStream(text);
                        obj = fieldReader.read(bais);
                        bais.close();
                    } catch (IOException e) {
                        e.printStackTrace();
                        continue;
                    }
                } else {
                    obj = StringEscapeUtils.unescapeXml(field.getTextTrim());
                }

                protein.putInfo(name, obj);
            }

            //System.out.println(protein.getAccession());

            if (proteinFilter == null || proteinFilter.filter(protein))
                data.addProtein(protein);

            int count = data.proteinCount();
            if (count % 1000 == 0)
                System.out.println(count);

            // prune the tree
            elem.detach();
        }
    });
}

From source file:musite.io.xml.UniProtXMLReader.java

License:Open Source License

public Proteins read(InputStream is) throws IOException {
    if (is == null) {
        throw new IllegalArgumentException();
    }// w  ww . j a  v  a2  s. c o m

    final Proteins result = data == null ? new ProteinsImpl() : data;

    SAXReader saxReader = new SAXReader();

    final StringBuilder acc = new StringBuilder(30);
    final StringBuilder name = new StringBuilder(30);
    final StringBuilder fullName = new StringBuilder(200);
    final StringBuilder org = new StringBuilder(30);
    final StringBuilder seq = new StringBuilder(2000);
    final List<List> sites = new ArrayList(4); // location, ptm, enzyme, annotation
    final Set<String> accs = new HashSet();

    // entry
    saxReader.addHandler("/uniprot/entry", new ElementHandler() {
        public void onStart(ElementPath path) {
            acc.setLength(0);
            fullName.setLength(0);
            seq.setLength(0);
            org.setLength(0);
            name.setLength(0);
            sites.clear();
            accs.clear();
        }

        public void onEnd(ElementPath path) {
            // process a element
            if (org.length() > 0 && (organismFilter == null || organismFilter.contains(org.toString()))
                    && acc.length() > 0 && seq.length() > 0) {
                String accession = acc.toString();
                String sequence = seq.toString();

                ProteinImpl protein = new ProteinImpl(acc.toString(), sequence,
                        name.length() == 0 ? null : name.toString(),
                        fullName.length() == 0 ? null : fullName.toString(),
                        org.length() == 0 ? null : org.toString());
                result.addProtein(protein);

                for (List l : sites) {
                    Integer site = (Integer) l.get(0);
                    PTM ptm = (PTM) l.get(1);
                    String enzyme = (String) l.get(2);
                    if (enzyme != null && enzyme.equalsIgnoreCase("autocatalysis")) {
                        enzyme = name.toString();
                    }

                    Map ann = (Map) l.get(3);
                    try {
                        PTMAnnotationUtil.annotate(protein, site, ptm, enzyme, ann);
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }

                if (keepAllIds) {
                    for (String ac : accs) {
                        mapIdMainId.put(ac, accession);
                    }
                    if (!accs.isEmpty())
                        protein.putInfo("other-accessions", new HashSet(accs));
                }

                //System.out.println(accession);
            }

            // prune the tree
            Element row = path.getCurrent();
            row.detach();
        }
    });

    // accession
    saxReader.addHandler("/uniprot/entry/accession", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            if (acc.length() == 0) {
                Element el = path.getCurrent();
                acc.append(el.getText());
                //                    if (keepAllIds) {
                //                        accs.add(acc.toString());
                //                    }
            } else {
                if (keepAllIds) {
                    accs.add(path.getCurrent().getText());
                }
            }

        }
    });

    // name
    saxReader.addHandler("/uniprot/entry/name", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            if (name.length() > 0)
                return;

            Element el = path.getCurrent();
            name.append(el.getText());
        }
    });

    // full name
    saxReader.addHandler("/uniprot/entry/protein/recommendedName/fullName", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            if (fullName.length() > 0)
                return;

            Element el = path.getCurrent();
            fullName.append(el.getTextTrim());
        }
    });

    saxReader.addHandler("/uniprot/entry/organism/name", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            if (org.length() > 0)
                return;

            Element el = path.getCurrent();
            String attr = el.attributeValue("type");
            if (attr == null || !attr.equalsIgnoreCase("scientific")) {
                return;
            }

            org.append(el.getText());
        }
    });

    saxReader.addHandler("/uniprot/entry/sequence", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            if (seq.length() > 0)
                return;

            Element el = path.getCurrent();
            seq.append(el.getText().replaceAll("\\p{Space}", ""));
        }
    });

    saxReader.addHandler("/uniprot/entry/feature", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            Element el = path.getCurrent();
            String type = el.attributeValue("type");
            if (type == null)
                return;

            PTM ptm = null;
            String enzyme = null;
            String description = null;
            String keyword = null;

            if (UNIPROT_TYPES.contains(type.toLowerCase())) {
                description = el.attributeValue("description");
                if (description == null)
                    return;

                String[] descs = description.split("; ");
                for (String desc : descs) {
                    PTM tmp = PTM.ofKeyword(desc);
                    if (tmp != null) {
                        ptm = tmp;
                        keyword = desc;
                    } else if (desc.startsWith("by ")) {
                        enzyme = desc.substring(3);
                    }
                }
            }
            //                else if (type.equalsIgnoreCase("glycosylation site")) {
            //                    description = el.attributeValue("description");
            //                    ptm = PTM.GLYCOSYLATION;
            //                }
            //                else if (type.equalsIgnoreCase()) {
            //                    description = el.attributeValue("description");
            //                    String[] descs = description.split("; ");
            //                    for (String desc : descs) {
            //                        PTM tmp = PTM.ofKeyword(desc);
            //                        if (tmp != null) {
            //                            ptm = tmp;
            //                            keyword = desc;
            //                        } else if (desc.startsWith("by ")) {
            //                            enzyme = desc.substring(3);
            //                        }
            //                    }
            //                }

            if (ptm == null || (ptmFilter != null && !ptmFilter.contains(ptm)))
                return;

            String status = el.attributeValue("status");
            if (status != null) {
                if (!includeBySimilarity && status.equalsIgnoreCase("By similarity"))
                    return;
                if (!includeProbable && status.equalsIgnoreCase("Probable"))
                    return;
                if (!includePotential && status.equalsIgnoreCase("Potential"))
                    return;
            }

            int site = -1;

            List<Element> locs = el.elements("location");
            for (Element loc : locs) {
                List<Element> poss = loc.elements("position");
                for (Element pos : poss) {
                    String str = pos.attributeValue("position");
                    if (str == null)
                        continue;

                    try {
                        site = Integer.parseInt(str) - 1; //start from 0
                    } catch (NumberFormatException e) {
                        continue;
                    }
                }
            }

            if (site != -1) {
                List l = new ArrayList();
                l.add(site);
                l.add(ptm);
                l.add(enzyme);
                Map<String, Object> m = new HashMap();
                if (keyword != null)
                    m.put("keyword", keyword);
                if (description != null)
                    m.put("description", description);
                if (status != null)
                    m.put("status", status);
                l.add(m);
                sites.add(l);
            }
        }
    });

    BufferedInputStream bis = new BufferedInputStream(is);

    try {
        saxReader.read(bis);
    } catch (DocumentException e) {
        throw new IOException(e.getMessage());
    }

    return result;
}