Example usage for org.jdom2 Element getText

Introduction

In this page you can find the example usage for org.jdom2 Element getText.

Prototype

public String getText()

Source Link

Document

Returns the textual content directly held under this element as a string.

Usage

From source file:Enrichissement.Jaccard.java

private static boolean comparerObjet(Element triplet1, Element triplet2) {
    Element uri1 = triplet1.getChild("uri");
    Element uri2 = triplet2.getChild("uri");
    Element literal1 = triplet1.getChild("literal");
    Element literal2 = triplet1.getChild("literal");
    if ((uri1 == null && uri2 != null) || (uri2 == null && uri1 != null)) {
        return false;
    } else if (uri1 != null && uri2 != null) {
        String uri1text = uri1.getText();
        String uri2text = uri2.getText();
        if (!(uri1text.equals(uri2text))) {
            return false;
        }//from  w  w w . ja v a 2 s  . c o  m
    } else if (literal1 != null && literal2 != null) {
        String lang1 = literal1.getAttributeValue("lang", Namespace.XML_NAMESPACE);
        String lang2 = literal2.getAttributeValue("lang", Namespace.XML_NAMESPACE);
        String datatype1 = literal1.getAttributeValue("datatype");
        String datatype2 = literal2.getAttributeValue("datatype");
        String literal1Text = literal1.getText();
        String literal2Text = literal2.getText();

        if (lang1 != null && lang2 != null) {

            if (!lang1.equals(lang2)) {
                return false;
            } else if (!literal1Text.equals(literal2Text)) {

                return false;
            }
        } else if (datatype1 != null && datatype2 != null) {
            if (!datatype1.equals(datatype2)) {
                return false;
            } else if (!literal1Text.equals(literal2Text)) {
                return false;
            }
        }

    }
    return true;
}

From source file:es.ucm.fdi.ac.Annotation.java

License:Open Source License

public void loadFromXML(Element element) throws IOException {
    String labelsAttribute = element.getAttributeValue("labels");
    labels.clear();//from  w  w  w.  j a  v  a 2s . c  o m
    for (String labelString : labelsAttribute.split(" ")) {
        labels.add(Label.valueOf(labelString.toLowerCase()));
    }

    author = element.getAttributeValue("author");
    target = element.getAttributeValue("target");

    localFile = element.getAttributeValue("localFile");
    if (localFile != null) {
        while (localFile.endsWith("/")) {
            localFile = localFile.substring(0, localFile.length() - 1);
        }
    }
    targetFile = element.getAttributeValue("targetFile");
    if (targetFile != null) {
        while (targetFile.endsWith("/")) {
            targetFile = targetFile.substring(0, targetFile.length() - 1);
        }
    }
    if (element.getAttributeValue("date") != null) {
        try {
            date = dateFormat.parse(element.getAttributeValue("date"));
        } catch (ParseException ex) {
            throw new IOException("Error parsing annotation date", ex);
        }
    }

    if (element.getText() != null) {
        commentary = element.getText().trim();
    }
}

From source file:es.ucm.fdi.ac.extract.PatternFilter.java

License:Open Source License

public void loadFromXML(Element element) throws IOException {
    setPattern(element.getText().trim());
}

From source file:es.upm.dit.xsdinferencer.extraction.extractorImpl.TypesExtractorImpl.java

License:Apache License

/**
 * Recursive method that traverses an element to extract all the possible information from it.
 * It is recursive because it calls itself for each child of the element (obviously, infinite recursion 
 * is not possible as there are not, or there should not be, parent-child loops).  
 * The index of the current document is necessary in order to add well some information to 
 * the statistics./*w  w  w.  j ava 2 s .  co  m*/
 * @param documentIndex index of current document
 * @param element the element to traverse (as a JDOM2 {@link Element})
 * @param enclosingComplexType the complex type which will contain the current element
 */
private void traverseElement(int documentIndex, Element element, String enclosingComplexType) {
    //Elements in the XSI namespace should be ignored
    if (element.getNamespaceURI().equalsIgnoreCase(XSI_NAMESPACE_URI))
        return;
    List<String> realPathUnfiltered = getRealPathOfElementUnfiltered(element, configuration, false,
            solvedNamespaceToPrefixMapping);
    String realPathFiltered = filterAndJoinRealPath(realPathUnfiltered);//Path for the statistics
    List<String> typePathUnfiltered = getRealPathOfElementUnfiltered(element, configuration, false,
            solvedNamespaceToPrefixMapping);
    List<String> suitablePath = getSuitablePath(typePathUnfiltered);//Path for type name inferencing
    //First, we will register the information of width and depth
    //The root is in a level whose width is 1, if we did not do the following, that width would be never registered
    if (element.isRootElement()) {
        statistics.registerWidth(documentIndex, 1);
    }
    statistics.registerDepth(documentIndex, realPathUnfiltered.size());
    int width = element.getChildren().size();
    if (width > 0) {
        statistics.registerWidth(documentIndex, width);
    }
    TypeNameInferencer typeNameInferencer = configuration.getTypeNameInferencer();
    String complexTypeName = typeNameInferencer.inferTypeName(suitablePath, configuration);//Complex type of this element
    //      //Little workaround that ensures that the same complex type is used 
    //      //when the elements on its path are the same (same name and namespace) but some of them 
    //      //use different namespace prefixes
    //      List<String> realPathUnfilteredKey=getRealPathOfElementUnfiltered(element, configuration, false, solvedNamespaceToPrefixMapping);
    //      List<String> suitablePathKey=getSuitablePath(realPathUnfilteredKey);//Path for type name inferencing
    //      String complexTypeNameKey = typeNameInferencer.inferTypeName(suitablePathKey, configuration);//Complex type of this element
    String complexTypeNameKey = complexTypeName;
    //The complex type object of this element.
    ComplexType complexType = complexTypes.get(complexTypeNameKey);
    if (complexType == null) {
        complexType = new ComplexType(complexTypeName, null, null, null);
        complexTypes.put(complexTypeNameKey, complexType); //New complex type
    }
    complexType.addSourceNodeNamespaceAndName(element.getNamespaceURI(), element.getName());
    //Comment processing
    for (Comment comment : element.getDescendants(Filters.comment())) {
        if (comment.getParentElement().equals(element))
            complexType.getComments().add(comment.getText());
    }

    //Key to find the corresponding SchemaElement
    //This key is: if the SchemaElement has an enclosing complex type (i.e., it is not a valid root), its name will be:
    //enclosingComplexType+typeNamesSeparator+elementName
    //If the element is a suitable root, the key is the name of the element.
    String schemaElementKey = (!enclosingComplexType.equals(""))
            ? enclosingComplexType + configuration.getTypeNamesAncestorsSeparator() + element.getName()
            : element.getName();
    if (configuration.getTypeNameInferencer() instanceof NameTypeNameInferencer) {
        schemaElementKey = element.getName(); //If we use a name-based type inferencer, the key is the name and we avoid problems.
    }
    SchemaElement schemaElement = elements.get(element.getNamespaceURI(), schemaElementKey);
    if (schemaElement == null) {
        schemaElement = new SchemaElement(element.getName(), element.getNamespaceURI(), complexType);//Complex type already not known.
        elements.put(element.getNamespaceURI(), schemaElementKey, schemaElement);
    }
    boolean wasAlreadyValidRoot = schemaElement.isValidRoot();
    schemaElement.setValidRoot(wasAlreadyValidRoot || element.isRootElement());

    ComplexTypeStatisticsEntry complexTypeStatisticsEntry = statistics.getComplexTypeInfo().get(complexType);
    if (complexTypeStatisticsEntry == null) {
        complexTypeStatisticsEntry = new ComplexTypeStatisticsEntry(xmlDocuments.size());
        statistics.getComplexTypeInfo().put(complexType, complexTypeStatisticsEntry);
    }

    AttributeListInferencer attributeListInferencer = attributeListInferencers.get(complexTypeName);

    if (attributeListInferencer == null) {
        attributeListInferencer = inferencersFactory.getAttributeListInferencerInstance(complexTypeName,
                configuration, solvedNamespaceToPrefixMapping, statistics);
        attributeListInferencers.put(complexTypeName, attributeListInferencer);
    }
    attributeListInferencer.learnAttributeList(element.getAttributes(), documentIndex);

    SimpleTypeInferencer simpleTypeInferencer = simpleTypeInferencersOfComplexTypes.get(complexTypeName);
    if (simpleTypeInferencer == null) {
        simpleTypeInferencer = inferencersFactory.getSimpleTypeInferencerInstance(complexTypeName,
                configuration);
        simpleTypeInferencersOfComplexTypes.put(complexTypeName, simpleTypeInferencer);
    }
    simpleTypeInferencer.learnValue(element.getText(), element.getNamespaceURI(), element.getName());

    //      SchemaElement previousChildSchemaElement=null; //We need to store the previous child in order to add the edge between it and the current child.
    List<SchemaElement> schemaElementChildren = new ArrayList<>(element.getChildren().size());
    for (int i = 0; i < element.getChildren().size(); i++) {
        Element child = element.getChildren().get(i);
        traverseElement(documentIndex, child, complexTypeName);
        String childSchemaElementKey = complexTypeName + configuration.getTypeNamesAncestorsSeparator()
                + child.getName();
        if (configuration.getTypeNameInferencer() instanceof NameTypeNameInferencer) {
            childSchemaElementKey = child.getName(); // If we use the name-based type name inferencer, the name is the key
        }
        SchemaElement childSchemaElement = elements.get(child.getNamespaceURI(), childSchemaElementKey);//The SchemaElement object does exist because the method traverseElement is called before this.
        //         if(i==0){
        //            automaton.addEdge(automaton.getInitialState(), childSchemaElement);
        //         }
        //         else {
        //            automaton.addEdge(previousChildSchemaElement, childSchemaElement);
        //            if(i==(element.getChildren().size()-1)){
        //               automaton.addEdge(childSchemaElement, automaton.getFinalState());
        //            }
        //         }
        complexTypeStatisticsEntry.registerElementCount(childSchemaElement, documentIndex);
        schemaElementChildren.add(childSchemaElement);
        //         previousChildSchemaElement=childSchemaElement;
    }

    ExtendedAutomaton automaton = automatons.get(complexTypeName);
    if (automaton == null) {
        automaton = new ExtendedAutomaton();
        SchemaElement initialState = new SchemaElement("initial", DEFAULT_PSEUDOELEMENTS_NAMESPACE, null);
        automaton.setInitialState(initialState);
        SchemaElement finalState = new SchemaElement("final", DEFAULT_PSEUDOELEMENTS_NAMESPACE, null);
        automaton.setFinalState(finalState);
        automatons.put(complexTypeName, automaton);
    }

    List<SchemaElement> schemaElementChildrenWithInitialAndFinal = new ArrayList<>(schemaElementChildren);
    schemaElementChildrenWithInitialAndFinal.add(0, automaton.getInitialState());
    schemaElementChildrenWithInitialAndFinal.add(automaton.getFinalState());
    automaton.learn(schemaElementChildrenWithInitialAndFinal);

    complexTypeStatisticsEntry.registerSubpatternsFromList(schemaElementChildren);
    complexTypeStatisticsEntry.registerValueOfNodeCount(element.getText(), schemaElement, documentIndex);

    statistics.registerElementAtPathCount(realPathFiltered, documentIndex);
    statistics.registerValueAtPathCount(realPathFiltered, element.getText(), documentIndex);
    if (enclosingComplexType.equals("")) {
        statistics.registerRootElementOccurrence(schemaElement);
    }
}

From source file:eu.himeros.cophi.ocr.proofreader.controller.pojo.OcrPageParser.java

License:Open Source License

/**
 * Parses an ocr alternative element and maps it on an Insertion.
 * @param ocrAlternativeEl the ocr alternative element.
 * @return the Insertion.//ww w.j  a  v  a 2 s  .c  om
 */
private Insertion parseAlternativeInsertion(Element ocrAlternativeEl) {
    Insertion alternativeInsertion = new Insertion();
    alternativeInsertion.setText(ocrAlternativeEl.getText());
    alternativeInsertion.setNlp(ocrAlternativeEl.getAttributeValue("title"));
    return alternativeInsertion;
}

From source file:eu.himeros.cophi.ocr.proofreader.controller.pojo.OcrPageParser.java

License:Open Source License

/**
 * Parses an ocr alternative element adn maps it on a Deletion.
 * @param ocrAlternativeEl the ocr alternative element.
 * @return the Deletion./* ww  w  .  j a  v  a 2 s  .c  o m*/
 */
private Deletion parseAlternativeDeletion(Element ocrAlternativeEl) {
    Deletion alternativeDeletion = new Deletion();
    alternativeDeletion.setText(ocrAlternativeEl.getText());
    alternativeDeletion.setNlp(ocrAlternativeEl.getAttributeValue("title"));
    return alternativeDeletion;
}

From source file:eu.himeros.hocr.GrcContextFilterMananger.java

License:Open Source License

@Override
public void adjustPreviousSuitableElement() {
    Element prevEl = queue.poll();
    Element currEl = queue.peek();
    Element nextEl = queue.get(1);
    try {//from  w  ww  . j a  va  2s  . c o m
        Element prevInfo = prevEl.getChild("span", prevEl.getNamespace());
        Element currInfo = currEl.getChild("span", currEl.getNamespace());
        Element nextInfo = nextEl.getChild("span", nextEl.getNamespace());
        if (currInfo != null && "UCWORD".equals(currInfo.getAttributeValue("class"))) {
            String suggestions = "";
            try {
                suggestions = filterSuggestions(currInfo.getText(), prevInfo.getText(), nextInfo.getText(),
                        currInfo.getAttributeValue("title"));
            } catch (NullPointerException npex) {
                //
            }
            if (suggestions.trim().contains(" ")) {
                currInfo.setAttribute("title", suggestions);
            } else if (suggestions.length() > 0) {
                currInfo.setAttribute("class", "CORRWORD");
                currInfo.setAttribute("title", currInfo.getText());
                currInfo.setText(suggestions);
            }
        }
    } catch (Exception ex) {
        ex.printStackTrace(System.err);
    }
}

From source file:eu.himeros.hocr.HocrInfoAggregator.java

License:Open Source License

private void parseOcrWord(Element ocrWord) {
    String text = ocrWord.getText();
    text = adjuster.adjust(new String[] { "monotonic2polytonic", "ocr2u" }, normalizer2.normalize(text));
    String upText = low2upL1Trans.parse(text);
    if (text.endsWith("-")) {
        ocrWord.setAttribute("idx", "" + id++);
        hyphenPart1 = ocrWord;//from   w  w  w  .ja  v a 2  s .  co  m
        return;
    } else if (hyphenPart1 != null) {
        text = adjuster.adjust(new String[] { "monotonic2polytonic", "ocr2u" },
                normalizer2.normalize(parseOcrHyphenatedWord(hyphenPart1, ocrWord)));
        upText = low2upL1Trans.parse(text);
    }
    Element infoSpan = new Element("span", xmlns);
    infoSpan.setText(adjuster.adjust(new String[] { "monotonic2polytonic", "ocr2u" },
            normalizer2.normalize(ocrWord.getText())));
    upText = upText.replaceAll(l1NonAlphabeticFilter, "");
    infoSpan.setAttribute("id", "" + id++);
    Integer occ;
    occ = ((occ = occHm.get(upText)) == null ? 1 : ++occ);
    occHm.put(upText, occ);
    infoSpan.setAttribute("uc", upText);
    try {
        ocrWord.getContent(0).detach();
    } catch (Exception ex) {
    }
    Token token = new Token(text);
    token = setClassiFicationAndScore(token);
    infoSpan = setInfoSpanClass(token, infoSpan);
    ocrWord.addContent(infoSpan);
    l1Fm.addSuitableElement(ocrWord);
    l1Fm.adjustPreviousSuitableElement();
    if (hyphenPart1 != null) {
        text = hyphenPart1.getText();
        hyphenPart1.getContent(0).detach();
        Element infoSpan1 = new Element("span", xmlns);
        infoSpan1.setAttribute("class", infoSpan.getAttributeValue("class"));
        infoSpan1.setText(text);
        hyphenPart1.addContent(infoSpan1);
        hyphenPart1 = null; //TODO: ???
    }
}

From source file:eu.himeros.hocr.HocrInfoAggregator.java

License:Open Source License

private String parseOcrHyphenatedWord(Element part1, Element part2) {
    String res = "";
    try {/*from  w  w  w  . j  a v a2s .  com*/
        res = part1.getText().substring(0, part1.getText().length() - 1) + part2.getText();
    } catch (Exception ex) {
    }
    return res;
}

From source file:eu.himeros.hocr.HocrInfoAggregator.java

License:Open Source License

private void makeCompliantHocr() {
    xpath = XPathFactory.instance().compile("//ns:span[@id|@idx]", Filters.element(), null,
            Namespace.getNamespace("ns", "http://www.w3.org/1999/xhtml"));
    List<Element> elements = xpath.evaluate(root);
    int spanId = 0;
    for (Element span : elements) {
        if (span.getAttribute("idx") != null) {
            try {
                span = span.getChildren().get(0);
            } catch (Exception ex) {
                //
            }//  w  w  w.j  a  va2 s.  co m
        }
        LinkedList<Attribute> attributeLl = new LinkedList(span.getParentElement().getAttributes());
        attributeLl.addFirst(new Attribute("id", "w_" + spanId++));
        span.getParentElement().setAttributes(attributeLl);
        String[] suggestions = null;
        String title = span.getAttributeValue("title");
        if (title != null) {
            suggestions = title.split(" ");
        }
        if (suggestions == null) {
            suggestions = new String[] { "" };
        }
        Element ins = new Element("ins", xmlns);
        ins.setAttribute("class", "alt");
        ins.setAttribute("title", makeNlp(span.getAttributeValue("class")));
        ins.setText(span.getText());
        span.removeContent();
        span.addContent(ins);
        span.setAttribute("class", "alternatives");
        span.removeAttribute("uc");
        span.removeAttribute("occ");
        span.removeAttribute("title");
        span.removeAttribute("anchor");
        span.removeAttribute("anchor-id");
        span.removeAttribute("id");
        span.getParentElement().removeAttribute("idx");
        span.removeAttribute("whole");
        span.getParentElement().removeAttribute("whole");
        if (title == null || "".equals(title)) {
            continue;
        }
        double score = 0.90;
        for (String suggestion : suggestions) {
            if (suggestion == null || "".equals(suggestion)) {
                continue;
            }
            Element del = new Element("del", xmlns);
            del.setAttribute("title", "nlp " + String.format("%.2f", score).replaceAll(",", "."));
            score = score - 0.01;
            suggestion = suggestion.replaceAll(l1PunctMarkFilter, "");
            Matcher leftMatcher = l1LeftPunctMarkPattern.matcher(ins.getText());
            if (leftMatcher.matches()) {
                suggestion = leftMatcher.group(1) + suggestion;
            }
            Matcher rightMatcher = l1RightPunctMarkPattern.matcher(ins.getText());
            if (rightMatcher.matches()) {
                String ngtSymbol = "";
                if (suggestion.endsWith("\u261a")) {
                    ngtSymbol = "\u261a";
                    suggestion = suggestion.substring(0, suggestion.length() - 1);
                }
                suggestion = suggestion + rightMatcher.group(1) + ngtSymbol;
            }
            ///!!!!
            if (suggestion.endsWith("\u261a") && ins.getParentElement().getParentElement()
                    .getAttributeValue("lang", Namespace.XML_NAMESPACE) != null) {
                String buff = suggestion.substring(0, suggestion.length() - 1);
                sa.align(buff, ins.getText());
                double sim = 1 - sa.getEditDistance()
                        / Math.max((double) buff.length(), (double) ins.getText().length());
                if (sim > 0.6) {

                    suggestion = ins.getText() + "\u261b";
                    ins.setText(buff);
                    ins.setAttribute("title", "nlp 0.70");
                }
            }
            del.addContent(suggestion);
            span.addContent(del);
        }
    }
}