Example usage for org.jdom2 Element setAttribute

List of usage examples for org.jdom2 Element setAttribute

Introduction

In this page you can find the example usage for org.jdom2 Element setAttribute.

Prototype

public Element setAttribute(final String name, final String value) 

Source Link

Document

This sets an attribute value for this element.

Usage

From source file:eu.himeros.digitaledition.AlignedQuotationParser.java

License:Open Source License

private void parseTextLine(String textLine) {
    String[] tokens = textLine.split(" ");
    for (String token : tokens) {
        token = token.replaceAll("[\n\t,;.]+", "");
        if (token.matches("[\u0380-\u03FF\u1F00-\u1FFF]+")) {
            if (hyphenatedFirstPart != null) {
                token = hyphenatedFirstPart + token;
                hyphenatedFirstPart = null;
            }/*from   ww  w . ja  v  a2 s.c o m*/
            Element el = new Element("w");
            el.setAttribute("id", "" + id++);
            el.setAttribute("text", token);
            el.setAttribute("uc", upperTrans.parse(token));
            rootOut.addContent(el);
        } else if (token.endsWith("-")) {
            hyphenatedFirstPart = token.substring(0, token.length() - 1);
        }
    }
}

From source file:eu.himeros.digitaledition.AlignedQuotationParser.java

License:Open Source License

private void injectOcc(Element root) {
    makeOccHm(root);//from   w w  w. j a v a  2  s. co  m
    List<Element> words = root.getChildren();
    for (Element word : words) {
        String upWord = word.getAttributeValue("uc");
        String occ = occHm.get(upWord).toString();
        word.setAttribute("occ", occ);
    }
}

From source file:eu.himeros.hocr.FlatXml.java

License:Open Source License

private void init(File inFile, File outFile) throws Exception {
    SAXBuilder builder = new SAXBuilder();
    Document doc = builder.build(inFile);
    Element root = doc.getRootElement();
    Namespace oldns = root.getNamespace();
    Element newRoot = new Element("html", "http://www.w3.org/1999/xhtml");
    Namespace xmlns = newRoot.getNamespace();
    Element head = root.getChild("head", oldns);
    head.setNamespace(xmlns);//w w w . j a v a 2  s. co m
    for (Element child : head.getChildren())
        child.setNamespace(xmlns);
    Element title = new Element("title", xmlns);
    title.addContent("ocr");
    if (head != null)
        head.addContent(title);
    Element body = root.getChild("body", oldns);
    body.setNamespace(xmlns);
    /*Element oldPage;
    try{
    oldPage=body.getChild("div",xmlns);
    }catch(Exception ex){
    oldPage=new Element("div",xmlns);
    }*/
    Element page = new Element("div", xmlns);
    page.setAttribute("class", "ocr_page");
    page.setAttribute("id", "i" + inFile.getName().substring(1).replace(".html", ".png"));
    XPathExpression<Element> xpath = XPathFactory.instance().compile("//*[@class='ocr_carea']",
            Filters.element(), null, Namespace.getNamespace("ns", "http://www.w3.org/1999/xhtml"));
    List<Element> careaElL = xpath.evaluate(body);
    for (Element careaEl : careaElL) {
        page.addContent(new Comment("<div class=\"" + careaEl.getAttributeValue("class") + "\" title=\""
                + careaEl.getAttributeValue("title") + "\">"));
        for (Element pEl : careaEl.getChildren()) {
            page.addContent(new Comment("<p>"));
            for (Element lineEl : pEl.getChildren()) {
                lineEl.removeAttribute("id");
                lineEl.setNamespace(xmlns);
                for (Element child : lineEl.getChildren()) {
                    child.removeAttribute("id");
                    child.removeAttribute("lang");
                    child.removeAttribute("lang", xmlns);
                    child.setNamespace(xmlns);
                }
                page.addContent(lineEl.clone());
            }
            page.addContent(new Comment("</p>"));
        }
        page.addContent(new Comment("</div>"));
    }
    //oldPage.detach();
    if (body != null) {
        body.removeContent();
        body.addContent(page);
    }
    newRoot.addContent(root.removeContent());
    doc.detachRootElement();
    doc.setRootElement(newRoot);
    XMLOutputter xmlOutputter = new XMLOutputter(Format.getPrettyFormat());
    xmlOutputter.output(doc, new BufferedWriter(new FileWriter(outFile)));
}

From source file:eu.himeros.hocr.GrcContextFilterMananger.java

License:Open Source License

@Override
public void adjustPreviousSuitableElement() {
    Element prevEl = queue.poll();
    Element currEl = queue.peek();
    Element nextEl = queue.get(1);
    try {//from  w ww  .  j  a v  a2s  . co m
        Element prevInfo = prevEl.getChild("span", prevEl.getNamespace());
        Element currInfo = currEl.getChild("span", currEl.getNamespace());
        Element nextInfo = nextEl.getChild("span", nextEl.getNamespace());
        if (currInfo != null && "UCWORD".equals(currInfo.getAttributeValue("class"))) {
            String suggestions = "";
            try {
                suggestions = filterSuggestions(currInfo.getText(), prevInfo.getText(), nextInfo.getText(),
                        currInfo.getAttributeValue("title"));
            } catch (NullPointerException npex) {
                //
            }
            if (suggestions.trim().contains(" ")) {
                currInfo.setAttribute("title", suggestions);
            } else if (suggestions.length() > 0) {
                currInfo.setAttribute("class", "CORRWORD");
                currInfo.setAttribute("title", currInfo.getText());
                currInfo.setText(suggestions);
            }
        }
    } catch (Exception ex) {
        ex.printStackTrace(System.err);
    }
}

From source file:eu.himeros.hocr.HocrInfoAggregator.java

License:Open Source License

private void parseOcrWord(Element ocrWord) {
    String text = ocrWord.getText();
    text = adjuster.adjust(new String[] { "monotonic2polytonic", "ocr2u" }, normalizer2.normalize(text));
    String upText = low2upL1Trans.parse(text);
    if (text.endsWith("-")) {
        ocrWord.setAttribute("idx", "" + id++);
        hyphenPart1 = ocrWord;//from w w  w .  ja  v a2s  .  co m
        return;
    } else if (hyphenPart1 != null) {
        text = adjuster.adjust(new String[] { "monotonic2polytonic", "ocr2u" },
                normalizer2.normalize(parseOcrHyphenatedWord(hyphenPart1, ocrWord)));
        upText = low2upL1Trans.parse(text);
    }
    Element infoSpan = new Element("span", xmlns);
    infoSpan.setText(adjuster.adjust(new String[] { "monotonic2polytonic", "ocr2u" },
            normalizer2.normalize(ocrWord.getText())));
    upText = upText.replaceAll(l1NonAlphabeticFilter, "");
    infoSpan.setAttribute("id", "" + id++);
    Integer occ;
    occ = ((occ = occHm.get(upText)) == null ? 1 : ++occ);
    occHm.put(upText, occ);
    infoSpan.setAttribute("uc", upText);
    try {
        ocrWord.getContent(0).detach();
    } catch (Exception ex) {
    }
    Token token = new Token(text);
    token = setClassiFicationAndScore(token);
    infoSpan = setInfoSpanClass(token, infoSpan);
    ocrWord.addContent(infoSpan);
    l1Fm.addSuitableElement(ocrWord);
    l1Fm.adjustPreviousSuitableElement();
    if (hyphenPart1 != null) {
        text = hyphenPart1.getText();
        hyphenPart1.getContent(0).detach();
        Element infoSpan1 = new Element("span", xmlns);
        infoSpan1.setAttribute("class", infoSpan.getAttributeValue("class"));
        infoSpan1.setText(text);
        hyphenPart1.addContent(infoSpan1);
        hyphenPart1 = null; //TODO: ???
    }
}

From source file:eu.himeros.hocr.HocrInfoAggregator.java

License:Open Source License

private Element setInfoSpanClass(Token token, Element infoSpan) {
    switch (token.getClassification()) {
    case WORD://from www  .  j a va  2s. c  om
        infoSpan.setAttribute("class", "WORD");
        break;
    case UCWORD:
        infoSpan.setAttribute("class", "UCWORD");
        infoSpan.setAttribute("title", makeSuggestions(token));
        break;
    case SYLLABICSEQ:
        infoSpan.setAttribute("class", "SYLLABICSEQ");
        infoSpan.setAttribute("title", makeSuggestions(token));
        break;
    case CHARSEQ:
        infoSpan.setAttribute("class", "CHARSEQ");
        infoSpan.setAttribute("title", makeSuggestions(token));
        break;
    case BADONE:
        infoSpan.setAttribute("class", "BADONE");
        infoSpan.setAttribute("title", makeSuggestions(token));
        break;
    case BADMANY:
        infoSpan.setAttribute("class", "BADMANY");
        infoSpan.setAttribute("title", makeSuggestions(token));
        break;
    case L2WORD:
        infoSpan.setAttribute("class", "L2WORD");
        makeSuggestions(token);
        infoSpan.setAttribute("title", token.getText());
        break;
    }
    return infoSpan;
}

From source file:eu.himeros.hocr.HocrInfoAggregator.java

License:Open Source License

private void updateElements() {
    xpath = XPathFactory.instance().compile("//ns:span[@uc!='']", Filters.element(), null,
            Namespace.getNamespace("ns", "http://www.w3.org/1999/xhtml"));
    List<Element> elements = xpath.evaluate(root);
    for (Element element : elements) {
        String uc = element.getAttributeValue("uc");
        element.setAttribute("occ", "" + occHm.get(uc));
        try {//w w w . j a  va  2 s .c  o  m
            if (occHm.get(uc) == 1) {
                element.setAttribute("anchor", nearGtHm.get(uc).getAttributeValue("uc"));
                element.setAttribute("anchor-id", nearGtHm.get(uc).getAttributeValue("id"));
                if ("CORRWORD".equals(element.getAttributeValue("class"))
                        | "UCWORD".equals(element.getAttributeValue("class"))) {
                    String title = element.getAttributeValue("title");
                    title = nearGtHm.get(uc).getAttributeValue("text") + "\u261a " + title;
                    element.setAttribute("title", title);
                }
            }
        } catch (Exception ex) {
            continue;
        }
    }
}

From source file:eu.himeros.hocr.HocrInfoAggregator.java

License:Open Source License

private void makeCompliantHocr() {
    xpath = XPathFactory.instance().compile("//ns:span[@id|@idx]", Filters.element(), null,
            Namespace.getNamespace("ns", "http://www.w3.org/1999/xhtml"));
    List<Element> elements = xpath.evaluate(root);
    int spanId = 0;
    for (Element span : elements) {
        if (span.getAttribute("idx") != null) {
            try {
                span = span.getChildren().get(0);
            } catch (Exception ex) {
                //
            }/*  w  ww.j a  v  a  2 s.  c  o m*/
        }
        LinkedList<Attribute> attributeLl = new LinkedList(span.getParentElement().getAttributes());
        attributeLl.addFirst(new Attribute("id", "w_" + spanId++));
        span.getParentElement().setAttributes(attributeLl);
        String[] suggestions = null;
        String title = span.getAttributeValue("title");
        if (title != null) {
            suggestions = title.split(" ");
        }
        if (suggestions == null) {
            suggestions = new String[] { "" };
        }
        Element ins = new Element("ins", xmlns);
        ins.setAttribute("class", "alt");
        ins.setAttribute("title", makeNlp(span.getAttributeValue("class")));
        ins.setText(span.getText());
        span.removeContent();
        span.addContent(ins);
        span.setAttribute("class", "alternatives");
        span.removeAttribute("uc");
        span.removeAttribute("occ");
        span.removeAttribute("title");
        span.removeAttribute("anchor");
        span.removeAttribute("anchor-id");
        span.removeAttribute("id");
        span.getParentElement().removeAttribute("idx");
        span.removeAttribute("whole");
        span.getParentElement().removeAttribute("whole");
        if (title == null || "".equals(title)) {
            continue;
        }
        double score = 0.90;
        for (String suggestion : suggestions) {
            if (suggestion == null || "".equals(suggestion)) {
                continue;
            }
            Element del = new Element("del", xmlns);
            del.setAttribute("title", "nlp " + String.format("%.2f", score).replaceAll(",", "."));
            score = score - 0.01;
            suggestion = suggestion.replaceAll(l1PunctMarkFilter, "");
            Matcher leftMatcher = l1LeftPunctMarkPattern.matcher(ins.getText());
            if (leftMatcher.matches()) {
                suggestion = leftMatcher.group(1) + suggestion;
            }
            Matcher rightMatcher = l1RightPunctMarkPattern.matcher(ins.getText());
            if (rightMatcher.matches()) {
                String ngtSymbol = "";
                if (suggestion.endsWith("\u261a")) {
                    ngtSymbol = "\u261a";
                    suggestion = suggestion.substring(0, suggestion.length() - 1);
                }
                suggestion = suggestion + rightMatcher.group(1) + ngtSymbol;
            }
            ///!!!!
            if (suggestion.endsWith("\u261a") && ins.getParentElement().getParentElement()
                    .getAttributeValue("lang", Namespace.XML_NAMESPACE) != null) {
                String buff = suggestion.substring(0, suggestion.length() - 1);
                sa.align(buff, ins.getText());
                double sim = 1 - sa.getEditDistance()
                        / Math.max((double) buff.length(), (double) ins.getText().length());
                if (sim > 0.6) {

                    suggestion = ins.getText() + "\u261b";
                    ins.setText(buff);
                    ins.setAttribute("title", "nlp 0.70");
                }
            }
            del.addContent(suggestion);
            span.addContent(del);
        }
    }
}

From source file:eus.ixa.ixa.pipe.convert.AbsaSemEval.java

License:Apache License

public static String nafToAbsa2015(String inputNAF) throws IOException {

    Path kafPath = Paths.get(inputNAF);
    KAFDocument kaf = KAFDocument.createFromFile(kafPath.toFile());
    Set<String> reviewIds = getReviewIdsFromXpathAttribute(kaf);

    // root element in ABSA 2015 and 2016 format
    Element reviewsElem = new Element("Reviews");
    Document doc = new Document(reviewsElem);

    // creating Reviews children of Review
    for (String reviewId : reviewIds) {
        Element reviewElem = new Element("Review");
        reviewElem.setAttribute("rid", reviewId);
        Element sentencesElem = new Element("sentences");
        // getting the sentences in the review
        List<List<WF>> sentencesByReview = getSentencesByReview(kaf, reviewId);
        for (List<WF> sent : sentencesByReview) {
            String sentId = sent.get(0).getXpath();
            Integer sentNumber = sent.get(0).getSent();

            // getting text element from word forms in NAF
            String textString = NAFUtils.getSentenceStringFromWFs(sent);
            Element sentenceElem = new Element("sentence");
            sentenceElem.setAttribute("id", sentId);
            Element textElem = new Element("text");
            textElem.setText(textString);
            sentenceElem.addContent(textElem);

            // creating opinions element for sentence
            List<Opinion> opinionsBySentence = getOpinionsBySentence(kaf, sentNumber);
            Element opinionsElem = new Element("Opinions");
            if (!opinionsBySentence.isEmpty()) {
                // getting opinion info from NAF Opinion layer
                for (Opinion opinion : opinionsBySentence) {
                    Element opinionElem = new Element("Opinion");
                    // String polarity = opinion.getOpinionExpression().getPolarity();
                    String category = opinion.getOpinionExpression().getSentimentProductFeature();
                    String targetString = opinion.getStr();
                    int fromOffset = opinion.getOpinionTarget().getTerms().get(0).getWFs().get(0).getOffset();
                    List<WF> targetWFs = opinion.getOpinionTarget().getTerms()
                            .get(opinion.getOpinionTarget().getTerms().size() - 1).getWFs();
                    int toOffset = targetWFs.get(targetWFs.size() - 1).getOffset()
                            + targetWFs.get(targetWFs.size() - 1).getLength();
                    opinionElem.setAttribute("target", targetString);
                    opinionElem.setAttribute("category", category);
                    // TODO we still do not have polarity here
                    opinionElem.setAttribute("polarity", "na");
                    opinionElem.setAttribute("from", Integer.toString(fromOffset));
                    opinionElem.setAttribute("to", Integer.toString(toOffset));
                    opinionsElem.addContent(opinionElem);
                }//from ww  w  . j a  v  a  2s.  c o  m
            }
            sentenceElem.addContent(opinionsElem);
            sentencesElem.addContent(sentenceElem);
        }
        reviewElem.addContent(sentencesElem);
        reviewsElem.addContent(reviewElem);
    } // end of review

    XMLOutputter xmlOutput = new XMLOutputter();
    Format format = Format.getPrettyFormat();
    xmlOutput.setFormat(format);
    return xmlOutput.outputString(doc);
}

From source file:eus.ixa.ixa.pipe.convert.AbsaSemEval.java

License:Apache License

public static String nafToAbsa2014(String kafDocument) {

    KAFDocument kaf = null;/* ww  w. j  a  v  a2 s.com*/
    try {
        Path kafPath = Paths.get(kafDocument);
        kaf = KAFDocument.createFromFile(kafPath.toFile());
    } catch (IOException e) {
        e.printStackTrace();
    }
    Element sentencesElem = new Element("sentences");
    Document doc = new Document(sentencesElem);

    for (List<WF> sent : kaf.getSentences()) {
        String sentId = sent.get(0).getXpath();
        Integer sentNumber = sent.get(0).getSent();

        // getting text element from WFs in NAF
        String textString = NAFUtils.getSentenceStringFromWFs(sent);
        Element sentenceElem = new Element("sentence");
        sentenceElem.setAttribute("id", sentId);
        Element textElem = new Element("text");
        textElem.setText(textString);
        sentenceElem.addContent(textElem);

        // creating opinions element for sentence
        List<Opinion> opinionsBySentence = getOpinionsBySentence(kaf, sentNumber);
        if (!opinionsBySentence.isEmpty()) {
            Element aspectTerms = new Element("aspectTerms");
            // getting opinion info from NAF Opinion layer
            for (Opinion opinion : opinionsBySentence) {
                String polarity = "";
                String targetString = opinion.getStr();
                int fromOffset = opinion.getOpinionTarget().getTerms().get(0).getWFs().get(0).getOffset();
                List<WF> targetWFs = opinion.getOpinionTarget().getTerms()
                        .get(opinion.getOpinionTarget().getTerms().size() - 1).getWFs();
                int toOffset = targetWFs.get(targetWFs.size() - 1).getOffset()
                        + targetWFs.get(targetWFs.size() - 1).getLength();

                Element aspectTerm = new Element("aspectTerm");
                aspectTerm.setAttribute("term", targetString);
                aspectTerm.setAttribute("polarity", polarity);
                aspectTerm.setAttribute("from", Integer.toString(fromOffset));
                aspectTerm.setAttribute("to", Integer.toString(toOffset));
                aspectTerms.addContent(aspectTerm);
            }
            sentenceElem.addContent(aspectTerms);
        }
        sentencesElem.addContent(sentenceElem);
    }
    XMLOutputter xmlOutput = new XMLOutputter();
    Format format = Format.getPrettyFormat();
    xmlOutput.setFormat(format);
    return xmlOutput.outputString(doc);
}