Example usage for org.jdom2 Element removeContent

Introduction

In this page you can find the example usage for org.jdom2 Element removeContent.

Prototype

@Override
public List<Content> removeContent()

Source Link

Document

Removes all child content from this parent.

Usage

From source file:at.ac.tuwien.ims.latex2mobiformulaconv.converter.mathml2html.FormulaConverter.java

License:Open Source License

/**
 * Replaces all formulas with the html representation of the mapped formula objects
 *
 * @param doc        JDOM Document where to replace the formulas
 * @param formulaMap Map of the indexed Formula Objects
 * @return JDOM Document with replaced formulas
 *//*from  w  ww .ja va  2 s.  c  o m*/
public Document replaceFormulas(Document doc, Map<Integer, Formula> formulaMap) {
    List<Element> foundFormulas = xpath.evaluate(doc);

    if (foundFormulas.size() > 0) {
        Map<String, Element> formulaMarkupMap = new HashMap<>();

        // Initialize markup map
        for (Element element : foundFormulas) {
            formulaMarkupMap.put(element.getAttribute("id").getValue(), element);
        }

        // Replace all found formulas
        Iterator<Integer> formulaIterator = formulaMap.keySet().iterator();
        while (formulaIterator.hasNext()) {
            Integer id = formulaIterator.next();

            Element formulaMarkupRoot = formulaMarkupMap.get(FORMULA_ID_PREFIX + id);
            Formula formula = formulaMap.get(id);

            formulaMarkupRoot.removeAttribute("class");
            formulaMarkupRoot.removeContent();
            formulaMarkupRoot.setName("div");

            Element div = (Element) formulaMarkupRoot.getParent();
            div.setName("div");
            div.setAttribute("class", "formula");

            // Potentially there's text inside the paragraph...
            List<Text> texts = div.getContent(Filters.textOnly());
            if (texts.isEmpty() == false) {
                String textString = "";
                for (Text text : texts) {
                    textString += text.getText();
                }
                Element textSpan = new Element("span");
                textSpan.setAttribute("class", "text");
                textSpan.setText(textString);
                div.addContent(textSpan);

                List<Content> content = div.getContent();
                content.removeAll(texts);
            }

            if (generateDebugMarkup) {
                div.setAttribute("style", "border: 1px solid black;");

                // Header
                Element h4 = new Element("h4");
                h4.setText("DEBUG - Formula #" + formula.getId());
                div.addContent(h4);

                // Render LaTeX source
                Element latexPre = new Element("pre");
                latexPre.setAttribute("class", "debug-latex");
                latexPre.setText(formula.getLatexCode());
                div.addContent(latexPre);

                // Render MathML markup
                Element mathmlPre = new Element("pre");
                mathmlPre.setAttribute("class", "debug-mathml");
                mathmlPre.setText(formula.getMathMl());
                div.addContent(mathmlPre);

                // Render HTML Markup
                Element htmlPre = new Element("pre");
                htmlPre.setAttribute("class", "debug-html");
                XMLOutputter xmlOutputter = new XMLOutputter();
                xmlOutputter.setFormat(Format.getRawFormat());
                htmlPre.setText(xmlOutputter.outputString(formula.getHtml()));

                div.addContent(htmlPre);

            }

            // Set formula into
            formulaMarkupRoot.addContent(formula.getHtml());
        }
    }
    return doc;
}

From source file:com.globalsight.dispatcher.bo.JobTask.java

License:Apache License

private void setTargetSegment(Element p_trgElement, String p_target, String p_encoding)
        throws UnsupportedEncodingException {
    if (p_target == null || p_target.trim().length() == 0)
        return;/*  ww  w. jav  a 2s .co m*/

    String target = new String(p_target.getBytes("UTF-8"), p_encoding);
    try {
        StringReader stringReader = new StringReader("<target>" + p_target + "</target>");
        SAXBuilder builder = new SAXBuilder();
        Document doc = builder.build(stringReader);
        Element elem = doc.getRootElement().clone().detach();
        setNamespace(elem, p_trgElement.getNamespace());
        //Delete Original Target Segment.
        p_trgElement.removeContent();
        for (int i = 0; i < elem.getContentSize(); i++) {
            p_trgElement.addContent(elem.getContent(i).clone().detach());
        }
    } catch (Exception e) {
        p_trgElement.setText(target);
    }
}

From source file:cz.muni.fi.mir.mathmlcanonicalization.modules.AbstractModule.java

License:Apache License

protected void replaceElement(final Element toReplace, final String replacementName) {
    assert toReplace != null && replacementName != null;
    assert !replacementName.isEmpty();
    final Element parent = toReplace.getParentElement();
    assert parent != null;
    final Element replacement = new Element(replacementName);
    replacement.addContent(toReplace.removeContent());
    final List<Attribute> attributes = toReplace.getAttributes();
    for (Attribute attribute : attributes) {
        replacement.setAttribute(attribute.detach());
    }/*from ww w .j  a  v a 2 s.c o  m*/
    final int parentIndex = parent.indexOf(toReplace);
    parent.removeContent(parentIndex);
    parent.addContent(parentIndex, replacement);
    LOGGER.log(Level.FINE, "{0} replaced with {1}", new Object[] { toReplace, replacementName });
}

From source file:cz.muni.fi.mir.mathmlcanonicalization.modules.MfencedReplacer.java

License:Apache License

private void replaceMfenced(final Element mfencedElement, final Element insideContent) {
    assert mfencedElement != null; // but insideContent can be null
    final Namespace ns = mfencedElement.getNamespace();
    Element replacement = new Element(ROW, ns);
    String openStr = getProperty(DEFAULT_OPEN);
    String closeStr = getProperty(DEFAULT_CLOSE);
    if (openStr.isEmpty() || closeStr.isEmpty()) {
        LOGGER.warning("Default open or close fence not set");
    }/*w ww .  j  a v  a  2 s. com*/

    if (!isEnabled(FORCE_DEFAULT_OPEN)) {
        openStr = mfencedElement.getAttributeValue(OPEN_FENCE, openStr);
    }
    if (!isEnabled(FORCE_DEFAULT_CLOSE)) {
        closeStr = mfencedElement.getAttributeValue(CLOSE_FENCE, closeStr);
    }

    replacement.addContent(new Element(OPERATOR, ns).setText(openStr));
    if (insideContent != null) {
        if (isEnabled(ADD_INNER_ROW)) {
            replacement.addContent(insideContent);
        } else {
            replacement.addContent(insideContent.removeContent());
        }
    }
    replacement.addContent(new Element(OPERATOR, ns).setText(closeStr));

    final Element parent = mfencedElement.getParentElement();
    final int index = parent.indexOf(mfencedElement);
    parent.removeContent(index);
    if (isEnabled(ADD_OUTER_ROW)) {
        parent.addContent(index, replacement);
    } else {
        parent.addContent(index, replacement.removeContent());
    }
    LOGGER.fine("Mfenced element converted");
}

From source file:de.danielluedecke.zettelkasten.database.Daten.java

License:Open Source License

/**
 * This method duplicates an entry and inserts it at the end or the next empty place in the
 * data file/*from  ww w  . ja v  a  2s  .  co m*/
 * 
 * @param nr the number of the entry that should be duplicated
 * @return 
 */
public boolean duplicateEntry(int nr) {
    // first of all, we duplicate all authors and keywords frequencies from the existing entry.
    // therefore, we first retrieve all author-index-numbers from that entry
    changeFrequencies(nr, 1);
    // retrieve entry that should be duplicated
    Element oldzettel = retrieveElement(zknFile, nr);
    // create new zettel
    Element zettel = new Element(ELEMENT_ZETTEL);
    // check whether we have any empty elements in between where we can insert the new entry
    int emptypos = retrieveFirstEmptyEntry();
    // if we have any empty elements...
    if (emptypos != -1 && settings.getInsertNewEntryAtEmpty()) {
        // retrieve empty element
        zettel = retrieveElement(zknFile, emptypos);
        // and remove former content, so we can add new content
        zettel.removeContent();
    }
    try {
        setZettelID(zettel);
        //
        // add title
        //
        // create child element with title information
        Element t = new Element(ELEMENT_TITLE);
        // and add it to the zettel-element
        zettel.addContent(t);
        // set value of the child element
        t.setText(oldzettel.getChild(ELEMENT_TITLE).getText());
        //
        // add content
        //
        // create child element with content information
        Element c = new Element(ELEMENT_CONTENT);
        // and add it to the zettel-element
        zettel.addContent(c);
        // set value of the content element
        c.setText(oldzettel.getChild(ELEMENT_CONTENT).getText());
        //
        // add author
        //
        // create child element with author information
        Element a = new Element(ELEMENT_AUTHOR);
        // and add it to the zettel-element
        zettel.addContent(a);
        // set value of author element
        a.setText(oldzettel.getChild(ELEMENT_AUTHOR).getText());
        //
        // add keywords
        //
        // create child element with keyword information
        Element k = new Element(ELEMENT_KEYWORD);
        // and add it to the zettel-element
        zettel.addContent(k);
        // store keyword index numbers
        k.setText(oldzettel.getChild(ELEMENT_KEYWORD).getText());
        //
        // now comes the manual links to other entries
        //
        Element m = new Element(ELEMENT_MANLINKS);
        zettel.addContent(m);
        m.setText("");
        //
        // add hyperlinks
        //
        // create child element with link information
        Element h = new Element(ELEMENT_ATTACHMENTS);
        // and add it to the zettel-element
        zettel.addContent(h);
        // add each hyperlink. therefor, iterate the array
        List<Element> links = oldzettel.getChild(ELEMENT_ATTACHMENTS).getChildren();
        Iterator<Element> i = links.iterator();
        while (i.hasNext()) {
            // create a new subchuld-element
            Element sublink = new Element(ELEMENT_ATTCHILD);
            Element le = i.next();
            // and add the link-string from the array
            sublink.setText(le.getText());
            h.addContent(sublink);
        }
        //
        // add remarks
        //
        // create child element with content information
        Element r = new Element(ELEMENT_REMARKS);
        // and add it to the zettel-element
        zettel.addContent(r);
        // set value of the content element
        r.setText(oldzettel.getChild(ELEMENT_REMARKS).getText());
        //
        // add timestamp
        //
        // set creation timestamp, but set no text for edit timestamp
        // since the entry is not edited
        setTimestamp(zettel, Tools.getTimeStamp(), "");
        //
        // now comes the luhmann number
        //
        Element l = new Element(ELEMENT_TRAILS);
        zettel.addContent(l);
        l.setText(oldzettel.getChild(ELEMENT_TRAILS).getText());
        //
        // complete datafile
        //
        // if we have any empty elements, go on here
        if (emptypos != -1 && settings.getInsertNewEntryAtEmpty()) {
            // return the empty-position, which is now filled with the new author-value
            zettelPos = emptypos;
        } else {
            // finally, add the whole element to the data file
            zknFile.getRootElement().addContent(zettel);
            // set the zettel-position to the new entry
            zettelPos = getCount(ZKNCOUNT);
        }
        // duplicate this entry into the correct entry order
        // by changing the prev/nex references (or pointers) of the entries.
        changeZettelPointer(zettelPos, nr);
        // titles have to be updated.
        setTitlelistUpToDate(false);
        // set modified state
        setModified(true);
    } catch (IllegalAddException ex) {
        Constants.zknlogger.log(Level.SEVERE, ex.getLocalizedMessage());
        return false;
    } catch (IllegalDataException ex) {
        Constants.zknlogger.log(Level.SEVERE, ex.getLocalizedMessage());
        return false;
    }
    return true;
}

From source file:de.danielluedecke.zettelkasten.database.Daten.java

License:Open Source License

/**
 * This method adds a new entry to the datafile. The needed parameters come from the JDialog
 * "CNewEntry.java". This dialog opens an edit-mask so the user can input the necessary information.
 * If everything is done, the JDialog retrieves all the information as string(-array)-variables
 * and simply passes these as paramaters to this method.
 * <br>/*  w  w w. j  av  a2  s  . com*/
 * <br>
 * What we have to do here is to check whether the keywords or links e.g. partly exist, and if so,
 * find out the related index number. Keywords which until now do not already exist in the keyword
 * file have to be added to the keyword file and the new index number has to be addes to the
 * keyword-element of the entry. and so on...
 *
 * @param title the entry's title as string
 * @param content the entry's content as string
 * @param authors the entry's author as string, retrieve index number and add it to the entry's author-element.
 * use {@code null} if no authors should be added
 * @param keywords the entry's keywords as string-array. retrieve index numbers and add those to the entry's keyword-element
 * use {@code null} if no authors should be added
 * @param remarks the remarks as string
 * @param links the entry's links as string
 * use {@code null} if no authors should be added
 * @param timestamp the current date. in this case, add it as creation date to the timestamp
 * @param luhmann the number of the currently display entry, before the user clicked "new" or "insert entry".
 * if we have to insert an entry, we need to know this number, because that entry retrieves this new entry's
 * index-number and adds it to its luhmann-tag (which indicates follower- and sub-entries).
 * use {@code -1} if no luhmann-number is needed (i.e. no follower-entry is added).
 * @param editDeletedEntry use {@code true} if user edits an deleted entry, which is the same as inserting a
 * new entry at the deleted entry's position. use {@code false} if a entry is added normally.
 * @param editDeletedEntryPosition the position of the currently displayed entry that is deleted and should be
 * overwritten with a new entry ({@code editDeletedEntry} is set to true).
 * @param insertAfterEntry indicates the position, after which existing entry the new added entry should be inserted.
 * Use {@code -1} to add the new entry to the end of the database.
 * @return one of the following constants:<br>
 * {@link #ADD_ENTRY_OK ADD_ENTRY_OK} if a normal entry was successfully added<br>
 * {@link #ADD_LUHMANNENTRY_OK ADD_LUHMANNENTRY_OK} if a follower-entry (trailing entry) was successfully added<br>
 * {@link #ADD_ENTRY_ERR ADD_ENTRY_ERR} if an error occured when adding a normal entry<br>
 * {@link #ADD_LUHMANNENTRY_ERR ADD_LUHMANNENTRY_ERR} if an error occured when adding a follower-entry (trailing entry)
 */
public int addEntry(String title, String content, String[] authors, String[] keywords, String remarks,
        String[] links, String timestamp, int luhmann, boolean editDeletedEntry, int editDeletedEntryPosition,
        int insertAfterEntry) {
    // init return value
    int retval = ADD_ENTRY_OK;
    List<Integer> manlinks;
    // check for valid content. if we have any content,
    // replace Unicode-chars with UBB-tags
    if (content != null && !content.isEmpty()) {
        content = Tools.replaceUnicodeToUbb(content);
    }
    // create a new zettel-element
    Element zettel = new Element(ELEMENT_ZETTEL);
    // check whether we have any empty elements in between where we can insert the new entry
    int emptypos = (editDeletedEntry) ? editDeletedEntryPosition : retrieveFirstEmptyEntry();
    // check whether user wants to edit an already deleted entry and insert a new one at
    // that position
    if (editDeletedEntry || (emptypos != -1 && settings.getInsertNewEntryAtEmpty())) {
        // retrieve empty element
        zettel = retrieveElement(zknFile, emptypos);
        // and remove former content, so we can add new content
        zettel.removeContent();
    }
    try {
        // add unique ID
        setZettelID(zettel);
        //
        // add title
        //
        // create child element with title information
        Element t = new Element(ELEMENT_TITLE);
        // and add it to the zettel-element
        zettel.addContent(t);
        // set value of the child element
        t.setText(title);
        //
        // add content
        //
        // create child element with content information
        Element c = new Element(ELEMENT_CONTENT);
        // and add it to the zettel-element
        zettel.addContent(c);
        // set value of the content element
        c.setText(content);
        // then, create form-images
        createFormImagesFromContent(content);
        //
        // add author
        //
        // create child element with author information
        Element a = new Element(ELEMENT_AUTHOR);
        // and add it to the zettel-element
        zettel.addContent(a);
        // create empty string buffer which stores the index numbers
        // of the converted authors
        StringBuilder newau = new StringBuilder("");
        // check whether we have authors at all
        if ((authors != null) && (authors.length > 0)) {
            // iterate the array and get the index number of each author string
            // if a author does not already exist, add it to the authorfile
            for (String aut : authors) {
                // trim leading and trailing spaces
                aut = aut.trim();
                // only proceed for this entry, if it contains a value
                if (!aut.isEmpty()) {
                    // add author
                    int authorPos = addAuthor(aut, 1);
                    // append the index number in the string buffer
                    newau.append(String.valueOf(authorPos));
                    // separator for the the index numbers, since more authors
                    // and thus more index numbers might be stored in the author element
                    newau.append(",");
                }
            }
            // check whether we have any author-value at all...
            if (newau.length() > 0) {
                // shorten the stringbuffer by one char, since we have a
                // superfluous comma char (see for-loop above)
                newau.setLength(newau.length() - 1);
                // and say that author list is out of date
                setAuthorlistUpToDate(false);
            }
        }
        a.setText(newau.toString());
        //
        // add keywords
        //
        // create child element with keyword information
        Element k = new Element(ELEMENT_KEYWORD);
        // and add it to the zettel-element
        zettel.addContent(k);
        // create empty string buffer which stores the index numbers
        // of the converted keywords
        StringBuilder newkw = new StringBuilder("");
        // check whether we have keywords at all
        if ((keywords != null) && (keywords.length > 0)) {
            // iterate the array and get the index number of each keyword string
            // if a keyword does not already exist, add it to the keywordfile
            for (String keyw : keywords) {
                // trim leading and trailing spaces
                keyw = keyw.trim();
                // only proceed for this entry, if it contains a value
                if (!keyw.isEmpty()) {
                    // add it to the data file
                    // and store the position of the new added keyword in the
                    // variable keywordPos
                    int keywordPos = addKeyword(keyw, 1);
                    // append the index number in the string buffer
                    newkw.append(String.valueOf(keywordPos));
                    // separator for the the index numbers, since more keywords
                    // and thus more index numbers might be stored in the keyword element
                    newkw.append(",");
                }
            }
            // check whether we have any keyword-values at all...
            if (newkw.length() > 0) {
                // shorten the stringbuffer by one char, since we have a
                // superfluous comma char (see for-loop above)
                newkw.setLength(newkw.length() - 1);
                // and say that author list is out of date
                setKeywordlistUpToDate(false);
            }
        }
        // store keyword index numbers
        k.setText(newkw.toString());
        //
        // now comes the manual links to other entries
        //
        Element m = new Element(ELEMENT_MANLINKS);
        zettel.addContent(m);
        // check for manual links in content
        // and add them
        manlinks = extractManualLinksFromContent(content);
        m.setText(retrievePreparedManualLinksFromContent(manlinks));
        //
        // add hyperlinks
        //
        // create child element with link information
        Element h = new Element(ELEMENT_ATTACHMENTS);
        // and add it to the zettel-element
        zettel.addContent(h);
        // add each hyperlink string
        if (links != null && links.length > 0) {
            // therefor, iterate the array
            for (String l : links) {
                // create a new subchuld-element
                Element sublink = new Element(ELEMENT_ATTCHILD);
                // and add the link-string from the array
                sublink.setText(l);
                h.addContent(sublink);
            }
        }
        //
        // add remarks
        //
        // create child element with content information
        Element r = new Element(ELEMENT_REMARKS);
        // and add it to the zettel-element
        zettel.addContent(r);
        // set value of the content element
        r.setText(remarks);
        //
        // add remarks
        //
        // set creation timestamp, but set no text for edit timestamp
        // since the entry is not edited
        setTimestamp(zettel, Tools.getTimeStamp(), "");
        //
        // now comes the luhmann number
        //
        Element l = new Element(ELEMENT_TRAILS);
        zettel.addContent(l);
        l.setText("");
        //
        // complete datafile
        //
        // if we have any empty elements, go on here
        if (emptypos != -1 && settings.getInsertNewEntryAtEmpty()) {
            // return the empty-position, which is now filled with the new author-value
            zettelPos = emptypos;
        } else {
            // finally, add the whole element to the data file
            zknFile.getRootElement().addContent(zettel);
            // set the zettel-position to the new entry
            zettelPos = getCount(ZKNCOUNT);
        }
        // and add the new position to the history...
        addToHistory();
        // set modified state
        setModified(true);
    } catch (IllegalAddException ex) {
        Constants.zknlogger.log(Level.SEVERE, ex.getLocalizedMessage());
        return ADD_ENTRY_ERR;
    } catch (IllegalDataException ex) {
        Constants.zknlogger.log(Level.SEVERE, ex.getLocalizedMessage());
        return ADD_ENTRY_ERR;
    }
    // if we have a follower-number (insert-entry), we have to change the luhmann-tag
    // of the related entry (which number is passed in the luhmann-variable)
    if (luhmann != -1) {
        // try to add luhmann number
        if (addLuhmannNumber(luhmann, zettelPos)) {
            // if it was successfull, we can insert this entry
            // after the "parent" entry
            retval = ADD_LUHMANNENTRY_OK;
            // to do this, we need to change the "insertAfter" value
            insertAfterEntry = luhmann;
        } else {
            retval = ADD_LUHMANNENTRY_ERR;
        }
    }
    // check whether inserted entry position is already the last position in
    // the entry order
    // in this case, we can set the variable to -1, so it will automatically be
    // added to the end
    changeZettelPointer(zettelPos, insertAfterEntry);
    // set this entry as first entry if we do not have any
    // first entry yet...
    if (-1 == getFirstZettel())
        setFirstZettel(zettelPos);
    // save ID of last added entry
    setLastAddedZettelID(zettel);
    // create back-references for manual links
    // we can do this here first, because we need
    // "zettelPos" as reference, which is not available earlier
    addManualLink(manlinks, zettelPos);
    // entry successfully added
    return retval;
}

From source file:de.danielluedecke.zettelkasten.database.Synonyms.java

License:Open Source License

/**
 * This method sets a new synonm-line, i.e. a synonym (as index-word) with its related synonyms.
 * The new synonyms have to passed as string-parameter {@code synline}.
 *
 * @param nr the number of the requested synonym, with a range from 0 to (getCount()-1)
 * @param synline a string-array with the first element being the index-word, and the following elements
 * being the related synonyms//from   w  w  w .  ja v  a  2s .c  om
 */
public void setSynonymLine(int nr, String[] synline) {
    // get element
    Element synonym = retrieveElement(nr);
    // remove all child-content (i.e. all synonyms)
    synonym.removeContent();
    try {
        // set the original word as value-attribute to the "entry"-element
        synonym.setAttribute("indexword", synline[0]);
        // now go through the rest of the string-array
        for (int cnt = 1; cnt < synline.length; cnt++) {
            // create a sub-child "syn" for each further synonym
            Element syn = new Element("syn");
            // set text from string array
            syn.setText(synline[cnt]);
            // add child to synonym-element
            synonym.addContent(syn);
            setModified(true);
        }
    } catch (IllegalDataException ex) {
        Constants.zknlogger.log(Level.SEVERE, ex.getLocalizedMessage());
    } catch (IllegalNameException ex) {
        Constants.zknlogger.log(Level.SEVERE, ex.getLocalizedMessage());
    }
}

From source file:de.relaunch64.popelganda.database.Settings.java

License:Open Source License

public void setReopenFiles(EditorPanes ep) {
    Element el = root.getChild(SETTING_REOPEN_FILES);
    if (null == el) {
        el = new Element(SETTING_REOPEN_FILES);
        root.addContent(el);//from  w w w  . j  ava 2  s  .  co  m
    }
    // remove existing content
    el.removeContent();
    // iterate all editorpanes and store file pathes
    for (int i = 0; i < ep.getCount(); i++) {
        // get file path and compiler settings of each file
        File fp = ep.getFilePath(i);
        int c = ep.getAssembler(i).getID();
        int s = ep.getScript(i);
        // save if exists
        if (fp != null && fp.exists()) {
            // create new child element
            Element child = new Element(SETTING_REOPEN_FILES_CHILD);
            // add path and compiler
            child.setText(fp.getAbsolutePath());
            child.setAttribute(ATTR_ASM, String.valueOf(c));
            child.setAttribute(ATTR_SCRIPT, String.valueOf(s));
            // add to database
            el.addContent(child);
        }
    }
}

From source file:eu.himeros.hocr.FlatXml.java

License:Open Source License

private void init(File inFile, File outFile) throws Exception {
    SAXBuilder builder = new SAXBuilder();
    Document doc = builder.build(inFile);
    Element root = doc.getRootElement();
    Namespace oldns = root.getNamespace();
    Element newRoot = new Element("html", "http://www.w3.org/1999/xhtml");
    Namespace xmlns = newRoot.getNamespace();
    Element head = root.getChild("head", oldns);
    head.setNamespace(xmlns);/*from  w  ww.  j av  a 2s . com*/
    for (Element child : head.getChildren())
        child.setNamespace(xmlns);
    Element title = new Element("title", xmlns);
    title.addContent("ocr");
    if (head != null)
        head.addContent(title);
    Element body = root.getChild("body", oldns);
    body.setNamespace(xmlns);
    /*Element oldPage;
    try{
    oldPage=body.getChild("div",xmlns);
    }catch(Exception ex){
    oldPage=new Element("div",xmlns);
    }*/
    Element page = new Element("div", xmlns);
    page.setAttribute("class", "ocr_page");
    page.setAttribute("id", "i" + inFile.getName().substring(1).replace(".html", ".png"));
    XPathExpression<Element> xpath = XPathFactory.instance().compile("//*[@class='ocr_carea']",
            Filters.element(), null, Namespace.getNamespace("ns", "http://www.w3.org/1999/xhtml"));
    List<Element> careaElL = xpath.evaluate(body);
    for (Element careaEl : careaElL) {
        page.addContent(new Comment("<div class=\"" + careaEl.getAttributeValue("class") + "\" title=\""
                + careaEl.getAttributeValue("title") + "\">"));
        for (Element pEl : careaEl.getChildren()) {
            page.addContent(new Comment("<p>"));
            for (Element lineEl : pEl.getChildren()) {
                lineEl.removeAttribute("id");
                lineEl.setNamespace(xmlns);
                for (Element child : lineEl.getChildren()) {
                    child.removeAttribute("id");
                    child.removeAttribute("lang");
                    child.removeAttribute("lang", xmlns);
                    child.setNamespace(xmlns);
                }
                page.addContent(lineEl.clone());
            }
            page.addContent(new Comment("</p>"));
        }
        page.addContent(new Comment("</div>"));
    }
    //oldPage.detach();
    if (body != null) {
        body.removeContent();
        body.addContent(page);
    }
    newRoot.addContent(root.removeContent());
    doc.detachRootElement();
    doc.setRootElement(newRoot);
    XMLOutputter xmlOutputter = new XMLOutputter(Format.getPrettyFormat());
    xmlOutputter.output(doc, new BufferedWriter(new FileWriter(outFile)));
}

From source file:eu.himeros.hocr.HocrInfoAggregator.java

License:Open Source License

private void makeCompliantHocr() {
    xpath = XPathFactory.instance().compile("//ns:span[@id|@idx]", Filters.element(), null,
            Namespace.getNamespace("ns", "http://www.w3.org/1999/xhtml"));
    List<Element> elements = xpath.evaluate(root);
    int spanId = 0;
    for (Element span : elements) {
        if (span.getAttribute("idx") != null) {
            try {
                span = span.getChildren().get(0);
            } catch (Exception ex) {
                //
            }//from   w  w  w  .  j a  va 2 s. co m
        }
        LinkedList<Attribute> attributeLl = new LinkedList(span.getParentElement().getAttributes());
        attributeLl.addFirst(new Attribute("id", "w_" + spanId++));
        span.getParentElement().setAttributes(attributeLl);
        String[] suggestions = null;
        String title = span.getAttributeValue("title");
        if (title != null) {
            suggestions = title.split(" ");
        }
        if (suggestions == null) {
            suggestions = new String[] { "" };
        }
        Element ins = new Element("ins", xmlns);
        ins.setAttribute("class", "alt");
        ins.setAttribute("title", makeNlp(span.getAttributeValue("class")));
        ins.setText(span.getText());
        span.removeContent();
        span.addContent(ins);
        span.setAttribute("class", "alternatives");
        span.removeAttribute("uc");
        span.removeAttribute("occ");
        span.removeAttribute("title");
        span.removeAttribute("anchor");
        span.removeAttribute("anchor-id");
        span.removeAttribute("id");
        span.getParentElement().removeAttribute("idx");
        span.removeAttribute("whole");
        span.getParentElement().removeAttribute("whole");
        if (title == null || "".equals(title)) {
            continue;
        }
        double score = 0.90;
        for (String suggestion : suggestions) {
            if (suggestion == null || "".equals(suggestion)) {
                continue;
            }
            Element del = new Element("del", xmlns);
            del.setAttribute("title", "nlp " + String.format("%.2f", score).replaceAll(",", "."));
            score = score - 0.01;
            suggestion = suggestion.replaceAll(l1PunctMarkFilter, "");
            Matcher leftMatcher = l1LeftPunctMarkPattern.matcher(ins.getText());
            if (leftMatcher.matches()) {
                suggestion = leftMatcher.group(1) + suggestion;
            }
            Matcher rightMatcher = l1RightPunctMarkPattern.matcher(ins.getText());
            if (rightMatcher.matches()) {
                String ngtSymbol = "";
                if (suggestion.endsWith("\u261a")) {
                    ngtSymbol = "\u261a";
                    suggestion = suggestion.substring(0, suggestion.length() - 1);
                }
                suggestion = suggestion + rightMatcher.group(1) + ngtSymbol;
            }
            ///!!!!
            if (suggestion.endsWith("\u261a") && ins.getParentElement().getParentElement()
                    .getAttributeValue("lang", Namespace.XML_NAMESPACE) != null) {
                String buff = suggestion.substring(0, suggestion.length() - 1);
                sa.align(buff, ins.getText());
                double sim = 1 - sa.getEditDistance()
                        / Math.max((double) buff.length(), (double) ins.getText().length());
                if (sim > 0.6) {

                    suggestion = ins.getText() + "\u261b";
                    ins.setText(buff);
                    ins.setAttribute("title", "nlp 0.70");
                }
            }
            del.addContent(suggestion);
            span.addContent(del);
        }
    }
}