Example usage for org.dom4j Element addElement

List of usage examples for org.dom4j Element addElement

Introduction

In this page you can find the example usage for org.dom4j Element addElement.

Prototype

Element addElement(String name);

Source Link

Document

Adds a new Element node with the given name to this branch and returns a reference to the new node.

Usage

From source file:com.fivepebbles.ProcessFiles.java

License:MIT License

@Override
public void setFiles(String[][] valuesin) {

    //Create a XML file to hold AWS bucket names and related files/folders
    Document mydoc = DocumentHelper.createDocument();
    Element root = mydoc.addElement("s3backup");
    Element bucket1 = null;/*from  w  w  w  .  j a  v  a 2  s. com*/

    for (int a1 = 0; a1 < valuesin.length; a1++) {

        for (int b1 = 0; b1 < valuesin[a1].length; b1++) {
            //Bucket names are in index 0
            if (b1 == 0) {
                bucket1 = root.addElement("bucket").addAttribute("name", valuesin[a1][b1]);
            } else {
                if (bucket1 != null & valuesin[a1][b1] != null) {
                    bucket1.addElement("file").addText(valuesin[a1][b1]);
                }
            }
        }
        bucket1 = null;
    }

    //Save files and folders in file system
    FileWriter fWriter2;
    try {
        fWriter2 = new FileWriter(new File("target", "s3files.xml"));
        mydoc.write(fWriter2);
        fWriter2.flush();
        fWriter2.close();
    } catch (IOException e) {
        //***TODO*** log message
        e.printStackTrace();
    }
}

From source file:com.flaptor.hounder.crawler.modules.IndexerModule.java

License:Apache License

private void deleteFromIndex(Page page) {
    org.dom4j.Document dom = DocumentHelper.createDocument();
    Element root = dom.addElement("documentDelete");

    root.addElement("documentId").addText(getDocumentId(page));
    try {//w ww  .jav  a  2  s  .c  o m
        int i = pageMapper.mapPage(page);
        while (indexers[i].index(dom) == IndexerReturnCode.RETRY_QUEUE_FULL) {
            try {
                Thread.sleep(indexerBusyRetryTime * 1000);
            } catch (InterruptedException e) {
                logger.debug("Sleep interrupted: " + e, e);
            }
        }
        page.setEmitted(false);
    } catch (Exception e) {
        logger.error(e, e);
    }
}

From source file:com.flaptor.hounder.crawler.modules.IndexerModule.java

License:Apache License

@SuppressWarnings("unchecked")
protected void addToIndex(FetchDocument doc) {

    byte[] content = doc.getContent();
    if (0 == content.length) {
        logger.warn("Page has no data. Ignoring this document.");
        return;//from w  ww  . j  a va  2 s  . c  o m
    }

    Set<String> categories = doc.getCategories();
    Map<String, Object> attributes = doc.getAttributes();
    Map<String, Object> indexableAttributes = doc.getIndexableAttributes();

    // build xml doc
    org.dom4j.Document dom = DocumentHelper.createDocument();
    Element root = dom.addElement("documentAdd");
    Page page = doc.getPage();
    String text = doc.getText();
    String url = page.getUrl();
    String host = getHost(url);
    String title = doc.getTitle(titleLengthLimit);
    String tokenizedHost = tokenizeHost(host);
    String anchorText = getAnchorText(page);

    float categoryBoost = calculateCategoryBoost(attributes);
    float pagerankBoost = calculatePagerankBoost(page);
    float spamrankBoost = calculateSpamrankBoost(page);
    float logBoost = calculateLogBoost(page);
    float freshnessBoost = calculateFreshnessBoost(page);

    // add overall score
    float f1 = factor("category", categoryBoost, categoryBoostDamp);
    float f2 = factor("pagerank", pagerankBoost, pagerankBoostDamp);
    float f3 = factor("spamrank", spamrankBoost, spamrankBoostDamp);
    float f4 = factor("log", logBoost, logBoostDamp);
    float f5 = factor("freshness", freshnessBoost, freshnessBoostDamp);
    float f6 = ((Double) attributes.get("boost")).floatValue(); // as calculated by the boost module, or 1.0 if no boost module is defined.
    float boost = f1 * f2 * f3 * f4 * f5 * f6;

    // System.out.println("BOOST url=["+url+"]  category="+f1+" ("+categoryBoost+":"+categoryBoostDamp+")  pagerank="+f2+" ("+pagerankBoost+":"+pagerankBoostDamp+")  log="+f3+" ("+logBoost+":"+logBoostDamp+")  freshness="+f4+" ("+freshnessBoost+":"+freshnessBoostDamp+") moduleBoost="+f5+"  Boost="+boost);

    if (boost < 1e-6f) {
        logger.warn("Boost too low! (" + boost + ")  category=" + f1 + " (" + categoryBoost + ":"
                + categoryBoostDamp + ")  pagerank=" + f2 + " (" + pagerankBoost + ":" + pagerankBoostDamp
                + ")  spamrank=" + f3 + " (" + spamrankBoost + ":" + spamrankBoostDamp + ")  log=" + f4 + " ("
                + logBoost + ":" + logBoostDamp + ")  freshness=" + f5 + " (" + freshnessBoost + ":"
                + freshnessBoostDamp + ") moduleBoost=" + f6);
        boost = 1e-6f;
    }

    if (null == title || "".equals(title)) {
        title = "Untitled";
    }

    root.addElement("boost").addText(String.valueOf(boost));
    root.addElement("documentId").addText(getDocumentId(page));

    Map<String, Double> boostMap = (Map<String, Double>) attributes.get("field_boost");

    // add the search fields
    addField(root, "url", url, true, true, true, boostMap);
    addField(root, "site", host, true, true, false, boostMap);
    addField(root, "tokenizedHost", tokenizedHost, false, true, true, boostMap);
    addField(root, "title", title, true, true, true, boostMap);
    addField(root, "text", text, true, true, true, boostMap);
    addField(root, "anchor", anchorText, false, true, true, boostMap);
    addField(root, "crawl", crawlName, false, true, true, boostMap);

    if (sendContent) {
        addBody(root, doc, content);
    }

    // for debugging only
    //addField(root, "boostinfo", "category="+f1+" ("+categoryBoost+":"+categoryBoostDamp+")  pagerank="+f2+" ("+pagerankBoost+":"+pagerankBoostDamp+")  log="+f3+" ("+logBoost+":"+logBoostDamp+")  freshness="+f4+" ("+freshnessBoost+":"+freshnessBoostDamp+") moduleBoost="+f5+"  Boost="+boost, true, false, false, null);

    addAdditionalFields(root, page, boostMap);

    // Adding metainfo from attributes
    Set<Entry<String, Object>> attributeSet = indexableAttributes.entrySet();
    for (Entry<String, Object> attribute : attributeSet) {
        addField(root, attribute.getKey(), attribute.getValue() == null ? "" : attribute.getValue().toString(),
                true, true, true, boostMap);
    }

    StringBuffer assignedCategories = new StringBuffer();
    if (null != categories) {
        // iterate through the classes the page belongs to add each category and its score
        for (Iterator<String> iter = categories.iterator(); iter.hasNext();) {
            assignedCategories.append(iter.next());
            assignedCategories.append(" ");

            // repeat the field times proportional to the score (this is a way to boost the document by category);
            //for (int rep = 0; rep < score*10; rep++) {
            //    addField(root, "categoryBoost", categ, false, true, false);
            //}
        }
        addField(root, "categories", assignedCategories.toString().trim(), true, true, true, boostMap);
    }

    if (logger.isDebugEnabled()) {
        logger.debug("Indexing dom: " + DomUtil.domToString(dom));
    }
    // Send the document to the indexer. If the queue is full, wait and retry.
    try {
        int i = pageMapper.mapPage(page);
        while (indexers[i].index(dom) == IndexerReturnCode.RETRY_QUEUE_FULL) {
            try {
                Thread.sleep(indexerBusyRetryTime * 1000);
            } catch (InterruptedException e) {
                logger.debug("Sleep interrupted: " + e, e);
            }
        }
        page.setEmitted(true);
    } catch (Exception e) {
        logger.error(e, e);
    }
}

From source file:com.flaptor.hounder.crawler.modules.IndexerModule.java

License:Apache License

/**
 * Adds a new field to the <code>doc</code> Element. 
 * //  www  .  ja va2  s.  c o m
 * @param doc the element to add the field to
 * @param name the name of the field
 * @param value the String value for the field
 * @param stored true iif should be stored
 * @param indexed true iif should be indexed
 * @param tokenized true iif should be tokenized
 * @param boostMap map containing the boosts for each field name
 */
protected final void addField(Element doc, String name, String value, boolean stored, boolean indexed,
        boolean tokenized, Map<String, Double> boostMap) {
    Double boost = 1.0d;
    if (null != boostMap && boostMap.containsKey(name)) {
        boost = boostMap.get(name);
    }
    doc.addElement("field").addAttribute("name", name).addAttribute("stored", Boolean.toString(stored))
            .addAttribute("indexed", Boolean.toString(indexed))
            .addAttribute("tokenized", Boolean.toString(tokenized)).addAttribute("boost", boost.toString())
            .addText(value);
}

From source file:com.flaptor.hounder.crawler.modules.IndexerModule.java

License:Apache License

protected final void addBody(Element doc, FetchDocument fetchDoc, byte[] bytes) {
    String encoding = null;//from www  .j av  a 2 s. c  o m
    // find charset. http headers usually have a Content-Type line, but
    // as it may not be in the same case, all headers are stored lowercased.
    // Content-Type lines contain mime-type and charset, separated by ;
    // for example
    // Content-Type: text/html; charset=UTF-8
    if (fetchDoc.getHeader().containsKey("content-type")) {
        String[] tokens = fetchDoc.getHeader().get("content-type").split(";");
        for (String token : tokens) {
            if (token.toLowerCase().contains("charset") && token.contains("=")) {
                encoding = token.split("=")[1].trim().toUpperCase();
                break;
            }
        }
    }
    // if not found, use default encoding
    if (null == encoding) {
        encoding = java.nio.charset.Charset.defaultCharset().name();
    }

    try {
        doc.addElement("body").addText(new String(bytes, encoding));
    } catch (java.io.UnsupportedEncodingException e) {
        logger.error("while adding body: ", e);
    }
}

From source file:com.flaptor.hounder.indexer.RmiIndexerStub.java

License:Apache License

private static Document generateDeleteDocument(String url) {
    org.dom4j.Document dom = DocumentHelper.createDocument();
    Element root = dom.addElement("documentDelete");
    root.addElement("documentId").addText(url);
    return dom;/*from w ww.  j ava 2s . com*/
}

From source file:com.flaptor.hounder.indexer.SanitizerModule.java

License:Apache License

public static void main(String[] args) {
    String text = args[0];//  w  w  w . ja  v  a2s . c  o m
    Document doc = DocumentHelper.createDocument();
    Element root = doc.addElement("documentAdd");
    root.addElement("text").addText(text);
    root.addElement("field").addAttribute("name", "text").addAttribute("indexed", "true")
            .addAttribute("stored", "true").addAttribute("tokenized", "true").addText(text);
    SanitizerModule mod = new SanitizerModule();
    Document[] docs = mod.internalProcess(doc);
    for (Document d : docs) {
        System.out.println(DomUtil.domToString(d));
    }
}

From source file:com.flaptor.hounder.searcher.OpenSearch.java

License:Apache License

/**
 * Creates a OpenSearch's compatible DOM document.
 * The generated dom contains only valid xml characters (infringing chars are removed).
 * Compliant with OpenSearch 1.0 with most of the Nutch 0.8.1 extensions.
 * @param baseUrl the url of the webapp//from  w w w  .jav a 2  s . c  o m
 * @param htmlSearcher the name of the component (servlet/jsp) that returns the search results in an HTML page
 * @param opensearchSearcher the name of the component (servlet/jsp) that returns the search results in an OpenSearch RSS page
 * @param extraParams the parameters present in the request, not passed explicitly (such as sort, reverse, etc.)
 * @param queryString the query string, as entered by the user
 * @param start the offset of the first result
 * @param count the number of results requested (the actual number of results found may be smaller)
 * @param sr the SearchResults structure containing the result of performing the query
 * @return a DOM document
 * <br>An empty sr argument means that no results were found.
 */
public static final Document buildDom_1_0(String baseUrl, String htmlSearcher, String opensearchSearcher,
        String extraParams, String queryString, int start, int count, GroupedSearchResults sr, int status,
        String statusMessage, boolean useXslt) {

    String encodedQuery = null;
    try {
        encodedQuery = URLEncoder.encode(queryString, "UTF-8");
    } catch (UnsupportedEncodingException e) {
        // Should never happen!
        encodedQuery = "";
    }
    Document dom = DocumentHelper.createDocument();
    if (useXslt) {
        Map<String, String> map = new HashMap<String, String>();
        map.put("type", "text/xsl");
        map.put("href", xsltPath);
        dom.addProcessingInstruction("xml-stylesheet", map);
    }

    Namespace opensearchNs = DocumentHelper.createNamespace("opensearch", XMLNS_A9_OPENSEARCH_1_0);
    Namespace hounderNs = DocumentHelper.createNamespace("hounder", XMLNS_HOUNDER_OPENSEARCH_1_0);
    Element root;
    Element channel;
    if (!useXslt) {
        root = dom.addElement("rss").addAttribute("version", "2.0");
        channel = root.addElement("channel");
    } else {
        channel = dom.addElement("searchResults");
        root = channel;
    }
    root.add(opensearchNs);
    root.add(hounderNs);

    channel.addElement("title").addText(titlePrefix + " " + DomUtil.filterXml(queryString));
    channel.addElement("link")
            .addText(baseUrl + "/" + htmlSearcher + "?query=" + encodedQuery + "&start=" + start + extraParams);
    channel.addElement("description").addText(descPrefix + " " + DomUtil.filterXml(queryString));
    channel.addElement(QName.get("totalResults", opensearchNs))
            .addText(Integer.toString(sr.totalGroupsEstimation()));
    channel.addElement(QName.get("startIndex", opensearchNs)).addText(Integer.toString(start));
    channel.addElement(QName.get("itemsPerPage", opensearchNs)).addText(Integer.toString(count));
    channel.addElement(QName.get("query", hounderNs)).addText(DomUtil.filterXml(queryString));
    AQuery suggestedQuery = sr.getSuggestedQuery();
    if (null != suggestedQuery) {
        channel.addElement(QName.get("suggestedQuery", hounderNs))
                .addText(DomUtil.filterXml(suggestedQuery.toString()));
    }
    channel.addElement(QName.get("status", hounderNs)).addText(Integer.toString(status));
    channel.addElement(QName.get("statusDesc", hounderNs)).addText(statusMessage);
    if (sr.lastDocumentOffset() > 0) {
        channel.addElement(QName.get("nextPage", hounderNs)).addText(baseUrl + "/" + opensearchSearcher
                + "?query=" + encodedQuery + "&start=" + (sr.lastDocumentOffset()) + extraParams);
    }

    for (int i = 0; i < sr.groups(); i++) {
        Vector<org.apache.lucene.document.Document> docs = sr.getGroup(i).last();
        Element parent = null;
        for (int j = 0; j < docs.size(); j++) {
            org.apache.lucene.document.Document doc = sr.getGroup(i).last().get(j);
            if (0 == j) {// j=0 is head of group. j>0 is tail
                parent = createAndAddElement(doc, channel, hounderNs);
            } else {
                createAndAddElement(doc, parent, hounderNs);
            }

        }
    }
    return dom;
}

From source file:com.flaptor.hounder.searcher.OpenSearch.java

License:Apache License

private static Element createAndAddElement(org.apache.lucene.document.Document doc, Element parent,
        Namespace hounderNs) {/*from   ww w.  j ava 2 s .  c  o  m*/
    String link = StringUtil.nullToEmpty(doc.get(linkField)).trim();
    String description = StringUtil.nullToEmpty(doc.get(descField)).trim();
    String title = StringUtil.nullToEmpty(doc.get(titleField)).trim();
    if ("".equals(title)) {
        title = link;
    }

    Element item = parent.addElement("item");
    item.addElement("title").addText(DomUtil.filterXml(title));
    item.addElement("link").addText(linkPrefix + DomUtil.filterXml(link));
    String desc = DomUtil.filterXml(description);
    System.out.println("===================================================================================");
    System.out.println("description: " + description);
    System.out.println("-----------------------------------------------------------------------------------");
    System.out.println("desc: " + desc);
    System.out.println("===================================================================================");
    item.addElement("description").addText(desc);

    for (Iterator iter = doc.getFields().iterator(); iter.hasNext();) {
        Field f = (Field) iter.next();
        if (fieldsToShow.contains(f.name())) {
            item.addElement(QName.get(f.name(), hounderNs)).addText(DomUtil.filterXml(f.stringValue()));
        }
    }
    return item;
}

From source file:com.flaptor.hounder.searcher.XmlResults.java

License:Apache License

/**
 * Creates a XML search results document (verbose version).
 * The generated dom contains only valid xml characters (infringing chars are removed).
 * @param queryString the query string, as entered by the user
 * @param start the offset of the first result
 * @param count the number of results requested (the actual number of results found may be smaller)
 * @param orderBy the field by which the results are sorted
 * @param sr the GroupedSearchResults structure containing the result of performing the query
 * @param status the code returned by the searcher
 * @param statusMsg the status description
 * @param xsltUri the uri for the xslt used to process the xml on the client side, 
 *          or null if no client-side processing is needed
 * @param rangeField field for which a range filter will be applied, or null if no filter used.
 * @param rangeStart start value for the range filter.
 * @param rangeEnd end value for the range filter.
 * @param params a map of parameters sent to the searcher with the request.
 * @return a DOM document//from w w w .j  av  a  2  s .c o  m
 * <br>An empty sr argument means that no results were found.
 */
public static final Document buildXml(String queryString, int start, int count, String orderBy,
        GroupedSearchResults sr, int status, String statusMsg, String xsltUri, String rangeField,
        String rangeStart, String rangeEnd, Map<String, String[]> params) {

    Document dom = DocumentHelper.createDocument();
    if (null != xsltUri) {
        Map<String, String> map = new HashMap<String, String>();
        map.put("type", "text/xsl");
        map.put("href", xsltUri);
        dom.addProcessingInstruction("xml-stylesheet", map);
    }
    Element root;
    Element group;
    root = dom.addElement("SearchResults");
    root.addElement("totalResults").addText(Integer.toString(sr.totalResults()));
    root.addElement("totalGroupsEstimation").addText(Integer.toString(sr.totalGroupsEstimation()));
    if (count > 0) {
        root.addElement("startIndex").addText(Integer.toString(start));
    }
    if (count > 0) {
        root.addElement("itemsPerPage").addText(Integer.toString(count));
    }
    if (null != orderBy) {
        root.addElement("orderBy").addText(DomUtil.filterXml(orderBy));
    }
    if (null != queryString) {
        root.addElement("query").addText(DomUtil.filterXml(queryString));
    }
    if (null != rangeField) {
        root.addElement("filter").addAttribute("field", rangeField).addAttribute("start", rangeStart)
                .addAttribute("end", rangeEnd);
    }
    if (null != params) {
        for (String key : params.keySet()) {
            if (null == root.selectSingleNode(key)) {
                String val = params.get(key)[0];
                root.addElement(key).addText(val);
            }
        }
    }
    AQuery suggestedQuery = sr.getSuggestedQuery();
    if (null != suggestedQuery) {
        root.addElement("suggestedQuery")
                .addText(DomUtil.filterXml(((LazyParsedQuery) suggestedQuery).getQueryString()));
    }
    root.addElement("status").addText(Integer.toString(status));
    root.addElement("statusDesc").addText(statusMsg);

    for (int i = 0; i < sr.groups(); i++) {
        String name = sr.getGroup(i).first();
        group = root.addElement("group").addAttribute("name", name);
        Vector<org.apache.lucene.document.Document> docs = sr.getGroup(i).last();
        for (int j = 0; j < docs.size(); j++) {
            org.apache.lucene.document.Document doc = sr.getGroup(i).last().get(j);
            createAndAddElement(doc, group);
        }
    }
    return dom;
}