Example usage for org.dom4j ElementHandler ElementHandler

List of usage examples for org.dom4j ElementHandler ElementHandler

Introduction

In this page you can find the example usage for org.dom4j ElementHandler ElementHandler.

Prototype

ElementHandler

Source Link

Usage

From source file:com.globalsight.terminology.util.MtfAnalyzer.java

License:Apache License

public void analyze(String p_url) throws Exception {
    m_entryCount = 0;/* ww w.j  a v  a2  s . c  om*/

    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");

    System.err.println("Analyzing document: " + p_url);

    // enable element complete notifications to conserve memory
    reader.addHandler("/mtf/conceptGrp", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;

            if (m_entryCount % 200 == 0) {
                log("Entry " + m_entryCount);
            }
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    Document document = reader.read(p_url);

    log("Total entries: " + m_entryCount);

    // all done
}

From source file:com.globalsight.terminology.util.MtfSplitter.java

License:Apache License

public void split(String p_url, String p_numEntries) throws Exception {
    final int maxEntries = Integer.parseInt(p_numEntries);
    final String baseName = getBaseName(p_url);
    final String extension = getExtension(p_url);

    m_entryCount = 0;/*from   www.j  a va 2s.  com*/

    SAXReader reader = new SAXReader();
    reader.setXMLReaderClassName("org.apache.xerces.parsers.SAXParser");

    log("Splitting document `" + p_url + "'");

    startFile(baseName, extension);

    // enable element complete notifications to conserve memory
    reader.addHandler("/mtf/conceptGrp", new ElementHandler() {
        public void onStart(ElementPath path) {
            ++m_entryCount;

            if (m_entryCount % maxEntries == 0) {
                try {
                    closeFile();
                    startFile(baseName, extension);
                } catch (Exception ex) {
                    log(ex.toString());
                    System.exit(1);
                }
            }
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            writeEntry(element.asXML());

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    Document document = reader.read(p_url);

    closeFile();

    // all done
}

From source file:com.globalsight.webservices.Ambassador.java

License:Apache License

/**
 * Updates a tu in database./*from   w w  w .j av  a  2 s  .c om*/
 * 
 * @param accessToken
 *            To judge caller has logon or not, can not be null. you can get
 *            it by calling method <code>login(username, password)</code>.
 * @param tmName
 *            TM name, will used to get tm id.
 * @param companyName
 *            company name, will used to get tm id.
 * @param tmx
 *            A tmx formate string inlcluding all tu information.
 * @return "true" if succeed
 * @throws WebServiceException
 */
public String editTu(String accessToken, String tmName, String companyName, String tmx)
        throws WebServiceException {
    try {
        Assert.assertNotEmpty(accessToken, "access token");
        Assert.assertNotEmpty(tmx, "tmx format");
    } catch (Exception e) {
        logger.error(e.getMessage(), e);
        throw new WebServiceException(e.getMessage());
    }

    checkAccess(accessToken, "editEntry");
    checkPermission(accessToken, Permission.TM_EDIT_ENTRY);

    Company company = getCompanyByName(companyName);
    if (company == null) {
        throw new WebServiceException("Can not find the company with name (" + companyName + ")");
    }
    final ProjectTM ptm = getProjectTm(tmName, company.getIdAsLong());
    if (ptm == null) {
        throw new WebServiceException(
                "Can not find the tm with tm name (" + tmName + ") and company name (" + companyName + ")");
    }

    SAXReader reader = new SAXReader();
    ElementHandler handler = new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element element = path.getCurrent();
            element.detach();

            try {
                normalizeTu(element);
                validateTu(element);
                if (ptm.getTm3Id() == null) {
                    editTm2Tu(element);
                } else {
                    editTm3Tu(element, ptm);
                }
            } catch (Throwable ex) {
                logger.error(ex.getMessage(), ex);
                throw new ThreadDeath();
            }
        }
    };
    reader.addHandler("/tu", handler);

    WebServicesLog.Start activityStart = null;
    try {
        String loggedUserName = this.getUsernameFromSession(accessToken);
        Map<Object, Object> activityArgs = new HashMap<Object, Object>();
        activityArgs.put("loggedUserName", loggedUserName);
        activityStart = WebServicesLog.start(Ambassador.class, "editTu(accessToken,tmx)", activityArgs);
        reader.read(new StringReader(tmx));
    } catch (DocumentException e) {
        logger.error(e.getMessage(), e);
        throw new WebServiceException(e.getMessage());
    } finally {
        if (activityStart != null) {
            activityStart.end();
        }
    }

    return "true";
}

From source file:com.nokia.config.SAXConfigParser.java

License:Open Source License

/**
 * Constructor/*from www.  jav a 2 s  .  c o m*/
 * @return list of available configurations that can be built.
 */
public String getConfigs() {
    File file = new File(sysdefFile);
    SAXReader reader = new SAXReader();
    reader.addHandler("/SystemDefinition/build/target", new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element row = path.getCurrent();
            Iterator itr = row.attributeIterator();
            while (itr.hasNext()) {
                Attribute child = (Attribute) itr.next();
                String attrName = child.getQualifiedName();
                if (attrName.equals("name")) {
                    configs += (String) child.getValue() + ",";
                }
            }
            row.detach();
        }
    });
    try {
        Document doc = reader.read(file);
    } catch (Exception e) {
        e.printStackTrace();
    }
    return configs;
}

From source file:com.nokia.helium.sbs.SAXSysdefParser.java

License:Open Source License

/**
 * Constructor//from   w ww .  ja  v a  2  s .c  om
 * 
 * @return list of available configurations that can be built.
 */
public void parseConfig(String nodeToGet) {
    layers = new ArrayList<String>();
    SAXReader reader = new SAXReader();
    reader.addHandler("/SystemDefinition/systemModel/" + nodeToGet, new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element row = path.getCurrent();
            Iterator itr = row.attributeIterator();
            while (itr.hasNext()) {
                Attribute child = (Attribute) itr.next();
                String attrName = child.getQualifiedName();
                if (attrName.equals("name")) {
                    layers.add(child.getValue());
                }
            }
            row.detach();
        }
    });
    try {
        reader.read(sysdefFile);
    } catch (DocumentException e) {
        e.printStackTrace();
    }
}

From source file:com.panet.imeta.trans.steps.getxmldata.GetXMLData.java

License:Open Source License

protected boolean setDocument(String StringXML, FileObject file, boolean IsInXMLField, boolean readurl)
        throws KettleException {

    try {/*from  w  w  w .j av  a  2s . co  m*/
        SAXReader reader = new SAXReader();
        data.stopPruning = false;

        // Validate XML against specified schema?
        if (meta.isValidating()) {
            reader.setValidation(true);
            reader.setFeature("http://apache.org/xml/features/validation/schema", true);
        }

        // Ignore comments?
        if (meta.isIgnoreComments())
            reader.setIgnoreComments(true);

        if (data.prunePath != null) {
            // when pruning is on: reader.read() below will wait until all
            // is processed in the handler
            if (log.isDetailed())
                logDetailed(Messages.getString("GetXMLData.Log.StreamingMode.Activated"));
            reader.addHandler(data.prunePath, new ElementHandler() {
                public void onStart(ElementPath path) {
                    // do nothing here...
                }

                public void onEnd(ElementPath path) {
                    if (isStopped()) {
                        // when a large file is processed and it should be
                        // stopped it is still reading the hole thing
                        // the only solution I see is to prune / detach the
                        // document and this will lead into a
                        // NPE or other errors depending on the parsing
                        // location - this will be treated in the catch part
                        // below
                        // any better idea is welcome
                        if (log.isBasic())
                            logBasic(Messages.getString("GetXMLData.Log.StreamingMode.Stopped"));
                        data.stopPruning = true;
                        path.getCurrent().getDocument().detach(); // trick
                        // to
                        // stop
                        // reader
                        return;
                    }

                    // process a ROW element
                    if (log.isDebug())
                        logDebug(Messages.getString("GetXMLData.Log.StreamingMode.StartProcessing"));
                    Element row = path.getCurrent();
                    try {
                        processStreaming(row.getDocument());
                    } catch (Exception e) {
                        // catch the KettleException or others and forward
                        // to caller, e.g. when applyXPath() has a problem
                        throw new RuntimeException(e);
                    }
                    // prune the tree
                    row.detach();
                    if (log.isDebug())
                        logDebug(Messages.getString("GetXMLData.Log.StreamingMode.EndProcessing"));
                }
            });
        }

        if (IsInXMLField) {
            // read string to parse
            data.document = reader.read(new StringReader(StringXML));
        } else if (readurl) {
            // read url as source
            data.document = reader.read(new URL(StringXML));
        } else {
            // get encoding. By default UTF-8
            String encoding = "UTF-8";
            if (!Const.isEmpty(meta.getEncoding()))
                encoding = meta.getEncoding();
            data.document = reader.read(KettleVFS.getInputStream(file), encoding);
        }

        if (meta.isNamespaceAware())
            prepareNSMap(data.document.getRootElement());
    } catch (Exception e) {
        if (data.stopPruning) {
            // ignore error when pruning
            return false;
        } else {
            throw new KettleException(e);
        }
    }
    return true;
}

From source file:galign.helpers.tmx.TmxFile.java

License:Apache License

/**
 * Reads and validates a TMX XML string.
 *///  www. j a  v a2s . c om
protected void init(SAXReader p_reader, InputSource p_input) throws org.dom4j.DocumentException {
    SAXReader reader = p_reader;

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx", new ElementHandler() {
        final public void onStart(ElementPath path) {
            Element element = path.getCurrent();

            m_tmxVersion = element.attributeValue("version");
        }

        final public void onEnd(ElementPath path) {
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/header", new ElementHandler() {
        final public void onStart(ElementPath path) {
        }

        final public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            m_header = new TmxHeader(element);
            m_header.setTmxVersion(m_tmxVersion);

            // prune the current element to reduce memory
            element.detach();

            element = null;
        }
    });

    // enable element complete notifications to conserve memory
    reader.addHandler("/tmx/body/tu", new ElementHandler() {
        final public void onStart(ElementPath path) {
        }

        final public void onEnd(ElementPath path) {
            Element element = path.getCurrent();

            addTu(new Tu(element));

            // prune the current element to reduce memory
            element.detach();
            element = null;
        }
    });

    Document document = reader.read(p_input);
    // all done.
}

From source file:musite.io.xml.PredictionResultXMLReader.java

License:Open Source License

public PredictionResult read(InputStream is) throws IOException {
    if (is == null) {
        throw new IllegalArgumentException();
    }// w  ww.  j  av a  2s .c  o  m

    final PredictionResult result = data == null ? new PredictionResultImpl() : data;

    ProteinsXMLReader proteinsReader = ProteinsXMLReader.createReader(result, false);
    proteinsReader.setRoot(root);

    ProteinResidueAnnotationReader annReader = new ProteinResidueAnnotationReader();
    annReader.putAnnotationFieldReader(musite.prediction.PredictionResult.ANNOTATION_FIELD_SCORE,
            SimpleFieldXMLReader.createDoubleCollectionReader());
    proteinsReader.putProteinFieldReader(musite.ResidueAnnotationUtil.RESIDUE_ANNOTATION, annReader);

    String path = "/model-list/model";
    if (root != null)
        path = "/" + root + path;
    proteinsReader.addSaxReaderHandler(path, new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            Element elem = path.getCurrent();
            String name = StringEscapeUtils.unescapeXml(elem.attributeValue("name"));

            PTM ptm = null;
            String strPTM = StringEscapeUtils.unescapeXml(elem.elementText("ptm"));
            if (strPTM != null)
                ptm = PTM.valueOf(strPTM);

            Set<AminoAcid> aas = null;
            String strAAs = StringEscapeUtils.unescapeXml(elem.elementText("amino-acids"));
            if (strAAs != null) {
                String[] strs = strAAs.replaceAll("\n", "").split(";");
                int n = strs.length;
                aas = new HashSet(n);
                for (int i = 0; i < n; i++) {
                    String str = strs[i].trim();
                    aas.add(AminoAcid.valueOf(str));
                }
            }

            SpecificityEstimatorImpl si = null;
            String strSI = StringEscapeUtils.unescapeXml(elem.elementText("spec-estimate-data"));
            if (strSI != null) {
                String[] strs = strSI.replaceAll("\n", "").split(";");
                List<Double> train = new ArrayList();
                for (String str : strs) {
                    train.add(Double.valueOf(str.trim()));
                }
                si = new SpecificityEstimatorImpl(train);
            }

            String comment = StringEscapeUtils.unescapeXml(elem.elementText("comment"));
            if (comment != null)
                comment = comment.replaceAll("%EOL%", "\n");

            PredictionModelImpl model = new PredictionModelImpl.Builder().name(name).ptm(ptm).aminoAcids(aas)
                    .specEstimator(si).comment(comment).build();

            result.addModel(model);

            // prune the tree
            elem.detach();
        }
    });

    proteinsReader.read(is);

    return result;
}

From source file:musite.io.xml.ProteinsXMLReader.java

License:Open Source License

public ProteinsXMLReader(Proteins proteins) {
    this.data = proteins;
    nullData = proteins == null;//w w w.j av  a2  s  .  c om
    proteinFieldReaders = new HashMap();
    fieldFilter = null;
    saxReaderHandler = new HashMap();
    String path = "/protein-list/protein";
    if (root != null)
        path = "/" + root + path;
    addSaxReaderHandler(path, new ElementHandler() {
        public void onStart(ElementPath path) {
        }

        public void onEnd(ElementPath path) {
            ProteinImpl protein = new ProteinImpl();

            Element elem = path.getCurrent();
            Iterator<Element> itr = elem.elementIterator();
            while (itr.hasNext()) {
                Element field = (Element) itr.next();
                String name = field.getQualifiedName();
                if (fieldFilter != null && fieldFilterInclude != fieldFilter.contains(name))
                    continue;

                Object obj;

                Reader fieldReader = proteinFieldReaders.get(name);
                if (fieldReader != null) {
                    try {
                        String text = nodeContentToString(field);//field.getTextTrim();
                        InputStream bais = StringUtil.toStream(text);
                        obj = fieldReader.read(bais);
                        bais.close();
                    } catch (IOException e) {
                        e.printStackTrace();
                        continue;
                    }
                } else {
                    obj = StringEscapeUtils.unescapeXml(field.getTextTrim());
                }

                protein.putInfo(name, obj);
            }

            //System.out.println(protein.getAccession());

            if (proteinFilter == null || proteinFilter.filter(protein))
                data.addProtein(protein);

            int count = data.proteinCount();
            if (count % 1000 == 0)
                System.out.println(count);

            // prune the tree
            elem.detach();
        }
    });
}

From source file:musite.io.xml.UniProtXMLReader.java

License:Open Source License

public Proteins read(InputStream is) throws IOException {
    if (is == null) {
        throw new IllegalArgumentException();
    }// w  ww . j a  v  a2  s. c o m

    final Proteins result = data == null ? new ProteinsImpl() : data;

    SAXReader saxReader = new SAXReader();

    final StringBuilder acc = new StringBuilder(30);
    final StringBuilder name = new StringBuilder(30);
    final StringBuilder fullName = new StringBuilder(200);
    final StringBuilder org = new StringBuilder(30);
    final StringBuilder seq = new StringBuilder(2000);
    final List<List> sites = new ArrayList(4); // location, ptm, enzyme, annotation
    final Set<String> accs = new HashSet();

    // entry
    saxReader.addHandler("/uniprot/entry", new ElementHandler() {
        public void onStart(ElementPath path) {
            acc.setLength(0);
            fullName.setLength(0);
            seq.setLength(0);
            org.setLength(0);
            name.setLength(0);
            sites.clear();
            accs.clear();
        }

        public void onEnd(ElementPath path) {
            // process a element
            if (org.length() > 0 && (organismFilter == null || organismFilter.contains(org.toString()))
                    && acc.length() > 0 && seq.length() > 0) {
                String accession = acc.toString();
                String sequence = seq.toString();

                ProteinImpl protein = new ProteinImpl(acc.toString(), sequence,
                        name.length() == 0 ? null : name.toString(),
                        fullName.length() == 0 ? null : fullName.toString(),
                        org.length() == 0 ? null : org.toString());
                result.addProtein(protein);

                for (List l : sites) {
                    Integer site = (Integer) l.get(0);
                    PTM ptm = (PTM) l.get(1);
                    String enzyme = (String) l.get(2);
                    if (enzyme != null && enzyme.equalsIgnoreCase("autocatalysis")) {
                        enzyme = name.toString();
                    }

                    Map ann = (Map) l.get(3);
                    try {
                        PTMAnnotationUtil.annotate(protein, site, ptm, enzyme, ann);
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }

                if (keepAllIds) {
                    for (String ac : accs) {
                        mapIdMainId.put(ac, accession);
                    }
                    if (!accs.isEmpty())
                        protein.putInfo("other-accessions", new HashSet(accs));
                }

                //System.out.println(accession);
            }

            // prune the tree
            Element row = path.getCurrent();
            row.detach();
        }
    });

    // accession
    saxReader.addHandler("/uniprot/entry/accession", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            if (acc.length() == 0) {
                Element el = path.getCurrent();
                acc.append(el.getText());
                //                    if (keepAllIds) {
                //                        accs.add(acc.toString());
                //                    }
            } else {
                if (keepAllIds) {
                    accs.add(path.getCurrent().getText());
                }
            }

        }
    });

    // name
    saxReader.addHandler("/uniprot/entry/name", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            if (name.length() > 0)
                return;

            Element el = path.getCurrent();
            name.append(el.getText());
        }
    });

    // full name
    saxReader.addHandler("/uniprot/entry/protein/recommendedName/fullName", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            if (fullName.length() > 0)
                return;

            Element el = path.getCurrent();
            fullName.append(el.getTextTrim());
        }
    });

    saxReader.addHandler("/uniprot/entry/organism/name", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            if (org.length() > 0)
                return;

            Element el = path.getCurrent();
            String attr = el.attributeValue("type");
            if (attr == null || !attr.equalsIgnoreCase("scientific")) {
                return;
            }

            org.append(el.getText());
        }
    });

    saxReader.addHandler("/uniprot/entry/sequence", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            if (seq.length() > 0)
                return;

            Element el = path.getCurrent();
            seq.append(el.getText().replaceAll("\\p{Space}", ""));
        }
    });

    saxReader.addHandler("/uniprot/entry/feature", new ElementHandler() {
        public void onStart(ElementPath path) {
            // do nothing
        }

        public void onEnd(ElementPath path) {
            Element el = path.getCurrent();
            String type = el.attributeValue("type");
            if (type == null)
                return;

            PTM ptm = null;
            String enzyme = null;
            String description = null;
            String keyword = null;

            if (UNIPROT_TYPES.contains(type.toLowerCase())) {
                description = el.attributeValue("description");
                if (description == null)
                    return;

                String[] descs = description.split("; ");
                for (String desc : descs) {
                    PTM tmp = PTM.ofKeyword(desc);
                    if (tmp != null) {
                        ptm = tmp;
                        keyword = desc;
                    } else if (desc.startsWith("by ")) {
                        enzyme = desc.substring(3);
                    }
                }
            }
            //                else if (type.equalsIgnoreCase("glycosylation site")) {
            //                    description = el.attributeValue("description");
            //                    ptm = PTM.GLYCOSYLATION;
            //                }
            //                else if (type.equalsIgnoreCase()) {
            //                    description = el.attributeValue("description");
            //                    String[] descs = description.split("; ");
            //                    for (String desc : descs) {
            //                        PTM tmp = PTM.ofKeyword(desc);
            //                        if (tmp != null) {
            //                            ptm = tmp;
            //                            keyword = desc;
            //                        } else if (desc.startsWith("by ")) {
            //                            enzyme = desc.substring(3);
            //                        }
            //                    }
            //                }

            if (ptm == null || (ptmFilter != null && !ptmFilter.contains(ptm)))
                return;

            String status = el.attributeValue("status");
            if (status != null) {
                if (!includeBySimilarity && status.equalsIgnoreCase("By similarity"))
                    return;
                if (!includeProbable && status.equalsIgnoreCase("Probable"))
                    return;
                if (!includePotential && status.equalsIgnoreCase("Potential"))
                    return;
            }

            int site = -1;

            List<Element> locs = el.elements("location");
            for (Element loc : locs) {
                List<Element> poss = loc.elements("position");
                for (Element pos : poss) {
                    String str = pos.attributeValue("position");
                    if (str == null)
                        continue;

                    try {
                        site = Integer.parseInt(str) - 1; //start from 0
                    } catch (NumberFormatException e) {
                        continue;
                    }
                }
            }

            if (site != -1) {
                List l = new ArrayList();
                l.add(site);
                l.add(ptm);
                l.add(enzyme);
                Map<String, Object> m = new HashMap();
                if (keyword != null)
                    m.put("keyword", keyword);
                if (description != null)
                    m.put("description", description);
                if (status != null)
                    m.put("status", status);
                l.add(m);
                sites.add(l);
            }
        }
    });

    BufferedInputStream bis = new BufferedInputStream(is);

    try {
        saxReader.read(bis);
    } catch (DocumentException e) {
        throw new IOException(e.getMessage());
    }

    return result;
}