Example usage for org.dom4j Element addElement

Introduction

In this page you can find the example usage for org.dom4j Element addElement.

Prototype

Element addElement(String name);

Source Link

Document

Adds a new Element node with the given name to this branch and returns a reference to the new node.

Usage

From source file:com.fivepebbles.ProcessFiles.java

License:MIT License

@Override
public void setFiles(String[][] valuesin) {

    //Create a XML file to hold AWS bucket names and related files/folders
    Document mydoc = DocumentHelper.createDocument();
    Element root = mydoc.addElement("s3backup");
    Element bucket1 = null;/*from  w  w  w  .  j a  v  a 2  s. com*/

    for (int a1 = 0; a1 < valuesin.length; a1++) {

        for (int b1 = 0; b1 < valuesin[a1].length; b1++) {
            //Bucket names are in index 0
            if (b1 == 0) {
                bucket1 = root.addElement("bucket").addAttribute("name", valuesin[a1][b1]);
            } else {
                if (bucket1 != null & valuesin[a1][b1] != null) {
                    bucket1.addElement("file").addText(valuesin[a1][b1]);
                }
            }
        }
        bucket1 = null;
    }

    //Save files and folders in file system
    FileWriter fWriter2;
    try {
        fWriter2 = new FileWriter(new File("target", "s3files.xml"));
        mydoc.write(fWriter2);
        fWriter2.flush();
        fWriter2.close();
    } catch (IOException e) {
        //***TODO*** log message
        e.printStackTrace();
    }
}

From source file:com.flaptor.hounder.crawler.modules.IndexerModule.java

License:Apache License

private void deleteFromIndex(Page page) {
    org.dom4j.Document dom = DocumentHelper.createDocument();
    Element root = dom.addElement("documentDelete");

    root.addElement("documentId").addText(getDocumentId(page));
    try {//w ww  .jav  a  2  s  .c  o m
        int i = pageMapper.mapPage(page);
        while (indexers[i].index(dom) == IndexerReturnCode.RETRY_QUEUE_FULL) {
            try {
                Thread.sleep(indexerBusyRetryTime * 1000);
            } catch (InterruptedException e) {
                logger.debug("Sleep interrupted: " + e, e);
            }
        }
        page.setEmitted(false);
    } catch (Exception e) {
        logger.error(e, e);
    }
}

From source file:com.flaptor.hounder.crawler.modules.IndexerModule.java

License:Apache License

@SuppressWarnings("unchecked")
protected void addToIndex(FetchDocument doc) {

    byte[] content = doc.getContent();
    if (0 == content.length) {
        logger.warn("Page has no data. Ignoring this document.");
        return;//from w  ww  . j  a va  2 s  . c  o m
    }

    Set<String> categories = doc.getCategories();
    Map<String, Object> attributes = doc.getAttributes();
    Map<String, Object> indexableAttributes = doc.getIndexableAttributes();

    // build xml doc
    org.dom4j.Document dom = DocumentHelper.createDocument();
    Element root = dom.addElement("documentAdd");
    Page page = doc.getPage();
    String text = doc.getText();
    String url = page.getUrl();
    String host = getHost(url);
    String title = doc.getTitle(titleLengthLimit);
    String tokenizedHost = tokenizeHost(host);
    String anchorText = getAnchorText(page);

    float categoryBoost = calculateCategoryBoost(attributes);
    float pagerankBoost = calculatePagerankBoost(page);
    float spamrankBoost = calculateSpamrankBoost(page);
    float logBoost = calculateLogBoost(page);
    float freshnessBoost = calculateFreshnessBoost(page);

    // add overall score
    float f1 = factor("category", categoryBoost, categoryBoostDamp);
    float f2 = factor("pagerank", pagerankBoost, pagerankBoostDamp);
    float f3 = factor("spamrank", spamrankBoost, spamrankBoostDamp);
    float f4 = factor("log", logBoost, logBoostDamp);
    float f5 = factor("freshness", freshnessBoost, freshnessBoostDamp);
    float f6 = ((Double) attributes.get("boost")).floatValue(); // as calculated by the boost module, or 1.0 if no boost module is defined.
    float boost = f1 * f2 * f3 * f4 * f5 * f6;

    // System.out.println("BOOST url=["+url+"]  category="+f1+" ("+categoryBoost+":"+categoryBoostDamp+")  pagerank="+f2+" ("+pagerankBoost+":"+pagerankBoostDamp+")  log="+f3+" ("+logBoost+":"+logBoostDamp+")  freshness="+f4+" ("+freshnessBoost+":"+freshnessBoostDamp+") moduleBoost="+f5+"  Boost="+boost);

    if (boost < 1e-6f) {
        logger.warn("Boost too low! (" + boost + ")  category=" + f1 + " (" + categoryBoost + ":"
                + categoryBoostDamp + ")  pagerank=" + f2 + " (" + pagerankBoost + ":" + pagerankBoostDamp
                + ")  spamrank=" + f3 + " (" + spamrankBoost + ":" + spamrankBoostDamp + ")  log=" + f4 + " ("
                + logBoost + ":" + logBoostDamp + ")  freshness=" + f5 + " (" + freshnessBoost + ":"
                + freshnessBoostDamp + ") moduleBoost=" + f6);
        boost = 1e-6f;
    }

    if (null == title || "".equals(title)) {
        title = "Untitled";
    }

    root.addElement("boost").addText(String.valueOf(boost));
    root.addElement("documentId").addText(getDocumentId(page));

    Map<String, Double> boostMap = (Map<String, Double>) attributes.get("field_boost");

    // add the search fields
    addField(root, "url", url, true, true, true, boostMap);
    addField(root, "site", host, true, true, false, boostMap);
    addField(root, "tokenizedHost", tokenizedHost, false, true, true, boostMap);
    addField(root, "title", title, true, true, true, boostMap);
    addField(root, "text", text, true, true, true, boostMap);
    addField(root, "anchor", anchorText, false, true, true, boostMap);
    addField(root, "crawl", crawlName, false, true, true, boostMap);

    if (sendContent) {
        addBody(root, doc, content);
    }

    // for debugging only
    //addField(root, "boostinfo", "category="+f1+" ("+categoryBoost+":"+categoryBoostDamp+")  pagerank="+f2+" ("+pagerankBoost+":"+pagerankBoostDamp+")  log="+f3+" ("+logBoost+":"+logBoostDamp+")  freshness="+f4+" ("+freshnessBoost+":"+freshnessBoostDamp+") moduleBoost="+f5+"  Boost="+boost, true, false, false, null);

    addAdditionalFields(root, page, boostMap);

    // Adding metainfo from attributes
    Set<Entry<String, Object>> attributeSet = indexableAttributes.entrySet();
    for (Entry<String, Object> attribute : attributeSet) {
        addField(root, attribute.getKey(), attribute.getValue() == null ? "" : attribute.getValue().toString(),
                true, true, true, boostMap);
    }

    StringBuffer assignedCategories = new StringBuffer();
    if (null != categories) {
        // iterate through the classes the page belongs to add each category and its score
        for (Iterator<String> iter = categories.iterator(); iter.hasNext();) {
            assignedCategories.append(iter.next());
            assignedCategories.append(" ");

            // repeat the field times proportional to the score (this is a way to boost the document by category);
            //for (int rep = 0; rep < score*10; rep++) {
            //    addField(root, "categoryBoost", categ, false, true, false);
            //}
        }
        addField(root, "categories", assignedCategories.toString().trim(), true, true, true, boostMap);
    }

    if (logger.isDebugEnabled()) {
        logger.debug("Indexing dom: " + DomUtil.domToString(dom));
    }
    // Send the document to the indexer. If the queue is full, wait and retry.
    try {
        int i = pageMapper.mapPage(page);
        while (indexers[i].index(dom) == IndexerReturnCode.RETRY_QUEUE_FULL) {
            try {
                Thread.sleep(indexerBusyRetryTime * 1000);
            } catch (InterruptedException e) {
                logger.debug("Sleep interrupted: " + e, e);
            }
        }
        page.setEmitted(true);
    } catch (Exception e) {
        logger.error(e, e);
    }
}

From source file:com.flaptor.hounder.crawler.modules.IndexerModule.java

License:Apache License

/**
 * Adds a new field to the <code>doc</code> Element. 
 * //  www  .  ja va2  s.  c o m
 * @param doc the element to add the field to
 * @param name the name of the field
 * @param value the String value for the field
 * @param stored true iif should be stored
 * @param indexed true iif should be indexed
 * @param tokenized true iif should be tokenized
 * @param boostMap map containing the boosts for each field name
 */
protected final void addField(Element doc, String name, String value, boolean stored, boolean indexed,
        boolean tokenized, Map<String, Double> boostMap) {
    Double boost = 1.0d;
    if (null != boostMap && boostMap.containsKey(name)) {
        boost = boostMap.get(name);
    }
    doc.addElement("field").addAttribute("name", name).addAttribute("stored", Boolean.toString(stored))
            .addAttribute("indexed", Boolean.toString(indexed))
            .addAttribute("tokenized", Boolean.toString(tokenized)).addAttribute("boost", boost.toString())
            .addText(value);
}

From source file:com.flaptor.hounder.crawler.modules.IndexerModule.java

License:Apache License

protected final void addBody(Element doc, FetchDocument fetchDoc, byte[] bytes) {
    String encoding = null;//from www  .j av  a 2 s. c  o m
    // find charset. http headers usually have a Content-Type line, but
    // as it may not be in the same case, all headers are stored lowercased.
    // Content-Type lines contain mime-type and charset, separated by ;
    // for example
    // Content-Type: text/html; charset=UTF-8
    if (fetchDoc.getHeader().containsKey("content-type")) {
        String[] tokens = fetchDoc.getHeader().get("content-type").split(";");
        for (String token : tokens) {
            if (token.toLowerCase().contains("charset") && token.contains("=")) {
                encoding = token.split("=")[1].trim().toUpperCase();
                break;
            }
        }
    }
    // if not found, use default encoding
    if (null == encoding) {
        encoding = java.nio.charset.Charset.defaultCharset().name();
    }

    try {
        doc.addElement("body").addText(new String(bytes, encoding));
    } catch (java.io.UnsupportedEncodingException e) {
        logger.error("while adding body: ", e);
    }
}

From source file:com.flaptor.hounder.indexer.RmiIndexerStub.java

License:Apache License

private static Document generateDeleteDocument(String url) {
    org.dom4j.Document dom = DocumentHelper.createDocument();
    Element root = dom.addElement("documentDelete");
    root.addElement("documentId").addText(url);
    return dom;/*from w ww.  j ava 2s . com*/
}

From source file:com.flaptor.hounder.indexer.SanitizerModule.java

License:Apache License

public static void main(String[] args) {
    String text = args[0];//  w  w  w . ja  v  a2s . c  o m
    Document doc = DocumentHelper.createDocument();
    Element root = doc.addElement("documentAdd");
    root.addElement("text").addText(text);
    root.addElement("field").addAttribute("name", "text").addAttribute("indexed", "true")
            .addAttribute("stored", "true").addAttribute("tokenized", "true").addText(text);
    SanitizerModule mod = new SanitizerModule();
    Document[] docs = mod.internalProcess(doc);
    for (Document d : docs) {
        System.out.println(DomUtil.domToString(d));
    }
}

From source file:com.flaptor.hounder.searcher.OpenSearch.java

License:Apache License

/**
 * Creates a OpenSearch's compatible DOM document.
 * The generated dom contains only valid xml characters (infringing chars are removed).
 * Compliant with OpenSearch 1.0 with most of the Nutch 0.8.1 extensions.
 * @param baseUrl the url of the webapp//from  w w w  .jav a 2  s . c  o m
 * @param htmlSearcher the name of the component (servlet/jsp) that returns the search results in an HTML page
 * @param opensearchSearcher the name of the component (servlet/jsp) that returns the search results in an OpenSearch RSS page
 * @param extraParams the parameters present in the request, not passed explicitly (such as sort, reverse, etc.)
 * @param queryString the query string, as entered by the user
 * @param start the offset of the first result
 * @param count the number of results requested (the actual number of results found may be smaller)
 * @param sr the SearchResults structure containing the result of performing the query
 * @return a DOM document
 * <br>An empty sr argument means that no results were found.
 */
public static final Document buildDom_1_0(String baseUrl, String htmlSearcher, String opensearchSearcher,
        String extraParams, String queryString, int start, int count, GroupedSearchResults sr, int status,
        String statusMessage, boolean useXslt) {

    String encodedQuery = null;
    try {
        encodedQuery = URLEncoder.encode(queryString, "UTF-8");
    } catch (UnsupportedEncodingException e) {
        // Should never happen!
        encodedQuery = "";
    }
    Document dom = DocumentHelper.createDocument();
    if (useXslt) {
        Map<String, String> map = new HashMap<String, String>();
        map.put("type", "text/xsl");
        map.put("href", xsltPath);
        dom.addProcessingInstruction("xml-stylesheet", map);
    }

    Namespace opensearchNs = DocumentHelper.createNamespace("opensearch", XMLNS_A9_OPENSEARCH_1_0);
    Namespace hounderNs = DocumentHelper.createNamespace("hounder", XMLNS_HOUNDER_OPENSEARCH_1_0);
    Element root;
    Element channel;
    if (!useXslt) {
        root = dom.addElement("rss").addAttribute("version", "2.0");
        channel = root.addElement("channel");
    } else {
        channel = dom.addElement("searchResults");
        root = channel;
    }
    root.add(opensearchNs);
    root.add(hounderNs);

    channel.addElement("title").addText(titlePrefix + " " + DomUtil.filterXml(queryString));
    channel.addElement("link")
            .addText(baseUrl + "/" + htmlSearcher + "?query=" + encodedQuery + "&start=" + start + extraParams);
    channel.addElement("description").addText(descPrefix + " " + DomUtil.filterXml(queryString));
    channel.addElement(QName.get("totalResults", opensearchNs))
            .addText(Integer.toString(sr.totalGroupsEstimation()));
    channel.addElement(QName.get("startIndex", opensearchNs)).addText(Integer.toString(start));
    channel.addElement(QName.get("itemsPerPage", opensearchNs)).addText(Integer.toString(count));
    channel.addElement(QName.get("query", hounderNs)).addText(DomUtil.filterXml(queryString));
    AQuery suggestedQuery = sr.getSuggestedQuery();
    if (null != suggestedQuery) {
        channel.addElement(QName.get("suggestedQuery", hounderNs))
                .addText(DomUtil.filterXml(suggestedQuery.toString()));
    }
    channel.addElement(QName.get("status", hounderNs)).addText(Integer.toString(status));
    channel.addElement(QName.get("statusDesc", hounderNs)).addText(statusMessage);
    if (sr.lastDocumentOffset() > 0) {
        channel.addElement(QName.get("nextPage", hounderNs)).addText(baseUrl + "/" + opensearchSearcher
                + "?query=" + encodedQuery + "&start=" + (sr.lastDocumentOffset()) + extraParams);
    }

    for (int i = 0; i < sr.groups(); i++) {
        Vector<org.apache.lucene.document.Document> docs = sr.getGroup(i).last();
        Element parent = null;
        for (int j = 0; j < docs.size(); j++) {
            org.apache.lucene.document.Document doc = sr.getGroup(i).last().get(j);
            if (0 == j) {// j=0 is head of group. j>0 is tail
                parent = createAndAddElement(doc, channel, hounderNs);
            } else {
                createAndAddElement(doc, parent, hounderNs);
            }

        }
    }
    return dom;
}

From source file:com.flaptor.hounder.searcher.OpenSearch.java

License:Apache License

private static Element createAndAddElement(org.apache.lucene.document.Document doc, Element parent,
        Namespace hounderNs) {/*from   ww w.  j ava 2 s .  c  o  m*/
    String link = StringUtil.nullToEmpty(doc.get(linkField)).trim();
    String description = StringUtil.nullToEmpty(doc.get(descField)).trim();
    String title = StringUtil.nullToEmpty(doc.get(titleField)).trim();
    if ("".equals(title)) {
        title = link;
    }

    Element item = parent.addElement("item");
    item.addElement("title").addText(DomUtil.filterXml(title));
    item.addElement("link").addText(linkPrefix + DomUtil.filterXml(link));
    String desc = DomUtil.filterXml(description);
    System.out.println("===================================================================================");
    System.out.println("description: " + description);
    System.out.println("-----------------------------------------------------------------------------------");
    System.out.println("desc: " + desc);
    System.out.println("===================================================================================");
    item.addElement("description").addText(desc);

    for (Iterator iter = doc.getFields().iterator(); iter.hasNext();) {
        Field f = (Field) iter.next();
        if (fieldsToShow.contains(f.name())) {
            item.addElement(QName.get(f.name(), hounderNs)).addText(DomUtil.filterXml(f.stringValue()));
        }
    }
    return item;
}

From source file:com.flaptor.hounder.searcher.XmlResults.java

License:Apache License

/**
 * Creates a XML search results document (verbose version).
 * The generated dom contains only valid xml characters (infringing chars are removed).
 * @param queryString the query string, as entered by the user
 * @param start the offset of the first result
 * @param count the number of results requested (the actual number of results found may be smaller)
 * @param orderBy the field by which the results are sorted
 * @param sr the GroupedSearchResults structure containing the result of performing the query
 * @param status the code returned by the searcher
 * @param statusMsg the status description
 * @param xsltUri the uri for the xslt used to process the xml on the client side, 
 *          or null if no client-side processing is needed
 * @param rangeField field for which a range filter will be applied, or null if no filter used.
 * @param rangeStart start value for the range filter.
 * @param rangeEnd end value for the range filter.
 * @param params a map of parameters sent to the searcher with the request.
 * @return a DOM document//from w w w .j  av  a  2  s .c o  m
 * <br>An empty sr argument means that no results were found.
 */
public static final Document buildXml(String queryString, int start, int count, String orderBy,
        GroupedSearchResults sr, int status, String statusMsg, String xsltUri, String rangeField,
        String rangeStart, String rangeEnd, Map<String, String[]> params) {

    Document dom = DocumentHelper.createDocument();
    if (null != xsltUri) {
        Map<String, String> map = new HashMap<String, String>();
        map.put("type", "text/xsl");
        map.put("href", xsltUri);
        dom.addProcessingInstruction("xml-stylesheet", map);
    }
    Element root;
    Element group;
    root = dom.addElement("SearchResults");
    root.addElement("totalResults").addText(Integer.toString(sr.totalResults()));
    root.addElement("totalGroupsEstimation").addText(Integer.toString(sr.totalGroupsEstimation()));
    if (count > 0) {
        root.addElement("startIndex").addText(Integer.toString(start));
    }
    if (count > 0) {
        root.addElement("itemsPerPage").addText(Integer.toString(count));
    }
    if (null != orderBy) {
        root.addElement("orderBy").addText(DomUtil.filterXml(orderBy));
    }
    if (null != queryString) {
        root.addElement("query").addText(DomUtil.filterXml(queryString));
    }
    if (null != rangeField) {
        root.addElement("filter").addAttribute("field", rangeField).addAttribute("start", rangeStart)
                .addAttribute("end", rangeEnd);
    }
    if (null != params) {
        for (String key : params.keySet()) {
            if (null == root.selectSingleNode(key)) {
                String val = params.get(key)[0];
                root.addElement(key).addText(val);
            }
        }
    }
    AQuery suggestedQuery = sr.getSuggestedQuery();
    if (null != suggestedQuery) {
        root.addElement("suggestedQuery")
                .addText(DomUtil.filterXml(((LazyParsedQuery) suggestedQuery).getQueryString()));
    }
    root.addElement("status").addText(Integer.toString(status));
    root.addElement("statusDesc").addText(statusMsg);

    for (int i = 0; i < sr.groups(); i++) {
        String name = sr.getGroup(i).first();
        group = root.addElement("group").addAttribute("name", name);
        Vector<org.apache.lucene.document.Document> docs = sr.getGroup(i).last();
        for (int j = 0; j < docs.size(); j++) {
            org.apache.lucene.document.Document doc = sr.getGroup(i).last().get(j);
            createAndAddElement(doc, group);
        }
    }
    return dom;
}