Example usage for org.jsoup.nodes Element attr

List of usage examples for org.jsoup.nodes Element attr

Introduction

In this page you can find the example usage for org.jsoup.nodes Element attr.

Prototype

public String attr(String attributeKey) 

Source Link

Document

Get an attribute's value by its key.

Usage

From source file:no.kantega.publishing.admin.content.htmlfilter.ImgHeightAndWidthFilter.java

@Override
public Document runFilter(Document document) {
    if (multimediaDao == null) {
        ApplicationContext context = RootContext.getInstance();
        multimediaDao = context.getBean(MultimediaDao.class);
        imageEditor = context.getBean(ImageEditor.class);
    }// w ww  .  j ava  2s.  c  o m

    for (Element img : document.getElementsByTag("img")) {
        String width = img.attr("width");
        String height = img.attr("height");
        if (isNotBlank(width) && isNoneBlank(height)) {
            try {
                int imageWidth = Integer.parseInt(width);
                int imageHeight = Integer.parseInt(height);

                String url = img.attr("src");
                if (url != null) {
                    List<Integer> ids = MultimediaHelper.getMultimediaIdsFromText(url);
                    if (ids.size() == 1) {
                        int multimediaId = ids.get(0);
                        Multimedia image = multimediaDao.getMultimedia(multimediaId);
                        if (imageWidth != image.getWidth() || imageHeight != image.getHeight()) {
                            MultimediaDimensions d = imageEditor.getResizedImageDimensions(image.getWidth(),
                                    image.getHeight(), imageWidth, imageHeight);
                            img.attr("height", String.valueOf(d.getHeight()));
                            img.attr("width", String.valueOf(d.getWidth()));
                            String imageUrl = image.getUrl();
                            img.attr("src",
                                    imageUrl + (imageUrl.contains("?") ? "&" : "?") + "width=" + d.getWidth());

                        }
                    }
                }

            } catch (NumberFormatException e) {
                log.error("Could not parse number", e);
            }
        }
    }
    return document;
}

From source file:no.kantega.publishing.admin.content.htmlfilter.ReplaceStyleAlignWithAttributeAlignFilter.java

@Override
public Document runFilter(Document document) {
    for (String tag : tags) {
        for (Element element : document.getElementsByTag(tag)) {
            String style = element.attr("style");
            if (isNotBlank(style)) {
                if (style.contains("right")) {
                    element.attr("align", "right");
                } else if (style.contains("left")) {
                    element.attr("align", "left");
                } else if (style.contains("center")) {
                    element.attr("align", "center");
                }//from ww w . ja  v  a 2  s .c o  m
                element.removeAttr("style");
            }
        }
    }
    return document;
}

From source file:no.kantega.publishing.modules.linkcheck.crawl.LinkExtractor.java

private void handleAttribute(Content content, LinkHandler linkHandler, Attribute attribute) {
    String attrName = (isNotBlank(attribute.getTitle())) ? attribute.getTitle() : attribute.getName();
    if (attribute instanceof HtmltextAttribute) {
        String html = attribute.getValue();
        try {//from ww w  .  j  ava2 s  . c o m
            if (html != null) {
                Elements links = Jsoup.parse(html).select("a[href]");
                for (Element link : links) {
                    String href = link.attr("href");
                    linkHandler.attributeLinkFound(content, href, attrName);

                }
            }
        } catch (Throwable e) {
            eventLog.log("LinkExtractor", "localhost", Event.FAILED_LINK_EXTRACT,
                    String.format("Failed to extract links from %s", content.getUrl()), content);
            log.error("contentId: {}, associationid: {}, attribute: {} {}", content.getId(),
                    content.getAssociation().getId(), attrName, html);
        }
    } else if (attribute instanceof UrlAttribute) {
        String link = attribute.getValue();
        if (link != null && link.length() > 0) {
            if (link.startsWith("/")) {
                link = Aksess.VAR_WEB + link;
            }
            linkHandler.attributeLinkFound(content, link, attrName);
        }
    } else if (attribute instanceof FileAttribute && isNotBlank(attribute.getValue())) {
        try {
            int attachmentId = Integer.parseInt(attribute.getValue());
            String link = Aksess.VAR_WEB + "/attachment.ap?id=" + attachmentId;
            linkHandler.attributeLinkFound(content, link, attrName);
        } catch (Exception e) {
            log.error("Error getting Content({}) FileAttribute {} with value {}", content.getId(),
                    attribute.getName(), attribute.getValue());
        }
    } else if (attribute instanceof MediaAttribute && isNotBlank(attribute.getValue())) {
        try {
            int mediaId = Integer.parseInt(attribute.getValue());
            String link = Aksess.VAR_WEB + "/multimedia.ap?id=" + mediaId;
            linkHandler.attributeLinkFound(content, link, attrName);
        } catch (Exception e) {
            log.error("Error getting Content({}) FileAttribute {} with value {}", content.getId(),
                    attribute.getName(), attribute.getValue());
        }
    } else if (attribute instanceof RepeaterAttribute) {
        RepeaterAttribute repeaterAttribute = (RepeaterAttribute) attribute;
        for (List<Attribute> attributes : repeaterAttribute) {
            for (Attribute a : attributes) {
                handleAttribute(content, linkHandler, a);
            }
        }
    }
}

From source file:noThreads.Menu.java

public void createMenu() throws IOException, InterruptedException {
    Document doc = null;//from w  ww.  j  a  v a  2  s  .  c o m
    BufferedReader br = null;

    System.out.print("******************** Menu Options ******************** "
            + "\n1. Get a playlist for all the stations at <e-radio.gr>"
            + "\n2. View the available station Categories and get a playlist."
            + "\n3. View the available station Locations and get a playlist."
            + "\n4. View the station Ratings (Top) and get a playlist." + "\n5. Exit." + "\n\n"
            + "Please make a choice (1-5): ");
    br = new BufferedReader(new InputStreamReader(System.in));

    try {
        choice = Integer.parseInt(br.readLine());
    } catch (IOException e) {
        System.out.println("Error!");
        System.exit(1);
    }

    switch (choice) {
    case (1): //GET all the e-radio location links (in order to get all the links)
        doc = parseUrl(URL, 0);

        if (doc == null) {
            print("No connection to the server! Exiting...");
            System.exit(1);
        }

        Elements links = doc.select("div[id=paneContainer]").select("a[href*=/locations/]");

        for (Element link : links)
            theUrls.add(link.attr("abs:href"));
        System.out.println("...Processing <All e-radio> station links");
        break;

    case (2): //Get CATEGORIES
        doc = parseUrl(URL, 0);

        if (doc == null) {
            print("No connection to the server! Exiting...");
            System.exit(1);
        }

        Elements categoryLinks = doc.select("div[id=paneContainer]").select("a[href*=/categories/]");

        System.out.println("E-radio stations available categories: " + "\n");
        for (int i = 0; i < categoryLinks.size(); i++) {
            System.out.println(i + 1 + ".  " + StringEscapeUtils.unescapeHtml4(categoryLinks.get(i).html()));
        }
        System.out.print("\n" + "Please make a choise (1-" + categoryLinks.size() + "): ");

        br = new BufferedReader(new InputStreamReader(System.in));
        try {
            choice = Integer.parseInt(br.readLine());
        } catch (IOException e) {
            System.out.println("Error!");
            System.exit(1);
        }
        if (choice <= categoryLinks.size() && choice >= 1) {
            theUrls.add(categoryLinks.get(choice - 1).attr("abs:href"));
            System.out.println("...Processing the <"
                    + StringEscapeUtils.unescapeHtml4(categoryLinks.get(choice - 1).html()) + "> category");
        } else {
            System.out.println("Wrong selection...");
            System.out.println("Exiting program...");
            System.exit(1);
        }

        break;

    case (3)://Get LOCATIONS
        doc = parseUrl(URL, 0);

        if (doc == null) {
            print("No connection to the server! Exiting...");
            System.exit(1);
        }

        Elements locationLinks = doc.select("div[id=paneContainer]").select("a[href*=/locations/]");

        System.out.println("E-radio stations available locations: " + "\n");
        for (int i = 0; i < locationLinks.size(); i++) {
            System.out.println(i + 1 + ".  " + StringEscapeUtils.unescapeHtml4(locationLinks.get(i).html()));
        }
        System.out.print("\n" + "Please make a choise (1-" + locationLinks.size() + "): ");

        br = new BufferedReader(new InputStreamReader(System.in));
        try {
            choice = Integer.parseInt(br.readLine());
        } catch (IOException e) {
            System.out.println("Error!");
            System.exit(1);
        }
        if (choice <= locationLinks.size() && choice >= 1) {
            theUrls.add(locationLinks.get(choice - 1).attr("abs:href"));
            System.out.println("...Processing <"
                    + StringEscapeUtils.unescapeHtml4(locationLinks.get(choice - 1).html()) + "> locatino");
        } else {
            System.out.println("Wrong selection!");
            System.out.println("Exiting program...");
            System.exit(1);
        }

        break;

    case (4):
        final int YEARLY_RATING = 10;
        doc = parseUrl(URL, 0);

        if (doc == null) {
            print("No connection to the server! Exiting...");
            System.exit(1);
        }

        Elements ratingsMenu = doc.select("div[class=menuFly]").select("li").select("a[class=hide]");

        print("\nStations ratings: \n");

        for (int i = 0; i < ratingsMenu.size(); i++) {
            System.out.println(i + 1 + ".  " + StringEscapeUtils.unescapeHtml4(ratingsMenu.get(i).html()));
        }
        System.out.print("\n" + "Please make a choise (1-" + ratingsMenu.size() + "): ");

        br = new BufferedReader(new InputStreamReader(System.in));
        try {
            choice = Integer.parseInt(br.readLine());
        } catch (IOException e) {
            System.out.println("Error!");
            System.exit(1);
        }

        /*
         * The html of the Ratings menu processed 
         * has this structure:
         * <div>
         *    <ul>
         *       <li>
         *          <ul>
         *             ...
         *          </ul>
         *       </li>
         *       ...
         *    </ul>
         * </div>
         */
        if (choice <= ratingsMenu.size() && choice >= 1) {
            //Get the DIV element with class "menuFly"
            Elements div = doc.select("div[class=menuFly]");
            //div Elements list has only one element. So get the children of div
            Elements ul = div.get(0).children();
            //ul Elements list has only one element. So get the children of ul
            Elements li = ul.get(0).children();

            //remove blank elements
            for (int j = 0; j < li.size(); j++) {
                if (li.get(j).hasText() == false)
                    li.remove(li.get(j));
            }

            //get the title of user choice and print it out
            print("\n%s", StringEscapeUtils.unescapeHtml4(ratingsMenu.get(choice - 1).html()) + "\n");
            //check if there is a sub-menu
            Elements ulTag = li.get(choice - 1).select("ul");
            if (ulTag.hasText() == true) {
                Elements subMenu = ulTag.select("li").select("a[href]");

                //print the sub-menu
                for (int j = 0; j < subMenu.size(); j++)
                    print("%s.  %s ", j + 1, StringEscapeUtils.unescapeHtml4(subMenu.get(j).html()));

                System.out.print("\n" + "Please make a choise (1-" + subMenu.size() + "): ");

                //read user input
                br = new BufferedReader(new InputStreamReader(System.in));
                try {
                    choice = Integer.parseInt(br.readLine());
                } catch (IOException e) {
                    System.out.println("Error!");
                    System.exit(1);
                }

                if (choice <= subMenu.size() && choice >= 1) {
                    theUrls.add(subMenu.get(choice - 1).attr("abs:href"));
                    System.out.println("...Processing the <"
                            + StringEscapeUtils.unescapeHtml4(subMenu.get(choice - 1).html()) + "> category");
                } else {
                    System.out.println("Wrong selection!");
                    System.out.println("Exiting program...");
                    System.exit(1);
                }
            } else {
                if (choice == YEARLY_RATING) {
                    String url = li.get(choice - 1).select("a[href").attr("abs:href");
                    doc = parseUrl(url, 0);

                    if (doc != null) {
                        Elements yearTopSubMenu = doc.select("div[id=maintabsid]").select("a[href]");

                        //print the sub-menu
                        for (int i = 0; i < yearTopSubMenu.size(); i++)
                            print("%s.  %s", i + 1,
                                    StringEscapeUtils.unescapeHtml4(yearTopSubMenu.get(i).html()));

                        System.out.print("\n" + "Please make a choise (1-" + yearTopSubMenu.size() + "): ");

                        //read user input
                        br = new BufferedReader(new InputStreamReader(System.in));
                        try {
                            choice = Integer.parseInt(br.readLine());
                        } catch (IOException e) {
                            System.out.println("Error!");
                            System.exit(1);
                        }

                        if (choice <= yearTopSubMenu.size() && choice >= 1) {
                            if (choice == 1) {
                                theUrls.add(yearTopSubMenu.get(choice - 1).attr("abs:href"));
                                print("...Processing the <"
                                        + StringEscapeUtils.unescapeHtml4(yearTopSubMenu.get(choice - 1).html())
                                        + "> category");
                            } else if (choice == 2) {
                                String link = yearTopSubMenu.get(choice - 1).attr("abs:href");
                                doc = parseUrl(link, 0);

                                //print menu title
                                print("\n%s",
                                        StringEscapeUtils.unescapeHtml4(yearTopSubMenu.get(choice - 1).html())
                                                + "\n");

                                if (doc != null) {
                                    Elements elem = doc.select("select[id=selectoption]")
                                            .select("option[value]");
                                    ArrayList<Integer> nums = new ArrayList<Integer>();

                                    for (int i = 0; i < elem.size(); i++) {
                                        //get the select category values and print the sub-menu
                                        int num = Integer.parseInt(elem.get(i).attr("value"));
                                        //add them to list
                                        nums.add(num);
                                        print("%s.  %s", i + 1, StringEscapeUtils.unescapeHtml4(
                                                elem.get(i).html().replace("Select category: ", "")));
                                    }

                                    System.out.print("\n" + "Please make a choise (1-" + elem.size() + "): ");

                                    //read user input
                                    br = new BufferedReader(new InputStreamReader(System.in));
                                    try {
                                        choice = Integer.parseInt(br.readLine());
                                    } catch (IOException e) {
                                        System.out.println("Error!");
                                        System.exit(1);
                                    }
                                    if (choice <= elem.size() && choice >= 1) {
                                        int num = nums.get(choice - 1);
                                        String added = "max=100&id=" + num + "&";
                                        String newlink = link.replace("max=100&", added);

                                        //print("\nlink: %s", newlink); DEBUG print

                                        theUrls.add(newlink);
                                        System.out
                                                .println("...Processing the <"
                                                        + StringEscapeUtils.unescapeHtml4(elem.get(choice - 1)
                                                                .html().replace("Select category: ", ""))
                                                        + "> category");
                                        print(elem.get(choice - 1).select("a[href]").attr("abs:href"));
                                    } else {
                                        System.out.println("Wrong selection!");
                                        System.out.println("Exiting program...");
                                        System.exit(1);
                                    }
                                } else {
                                    System.out.println("ERROR: Cannot get links from server!");
                                    System.out.println("Exiting program...");
                                    System.exit(1);
                                }
                            } else {
                                String link = yearTopSubMenu.get(choice - 1).attr("abs:href");
                                doc = parseUrl(link, 0);

                                //print menu title
                                print("\n%s",
                                        StringEscapeUtils.unescapeHtml4(yearTopSubMenu.get(choice - 1).html())
                                                + "\n");

                                if (doc != null) {
                                    Elements elem = doc.select("select[id=selectoption]")
                                            .select("option[value]");
                                    ArrayList<Integer> nums = new ArrayList<Integer>();

                                    for (int i = 0; i < elem.size(); i++) {
                                        //get the select category values and print the sub-menu
                                        int num = Integer.parseInt(elem.get(i).attr("value"));
                                        //add them to list
                                        nums.add(num);
                                        print("%s.  %s", i + 1, StringEscapeUtils.unescapeHtml4(
                                                elem.get(i).html().replace("Select location: ", "")));
                                    }

                                    System.out.print("\n" + "Please make a choise (1-" + elem.size() + "): ");

                                    //read user input
                                    br = new BufferedReader(new InputStreamReader(System.in));
                                    try {
                                        choice = Integer.parseInt(br.readLine());
                                    } catch (IOException e) {
                                        System.out.println("Error!");
                                        System.exit(1);
                                    }
                                    if (choice <= elem.size() && choice >= 1) {
                                        int num = nums.get(choice - 1);
                                        String[] linkParts = link.split("&", 4);
                                        String finalLink = linkParts[0] + "&" + linkParts[1] + "&" + "id=" + num
                                                + "&" + linkParts[3];

                                        //print("\nlink: %s \n link2: %s \n link3: %s \n link: %s \nsize: %s", linkParts[0], linkParts[1], linkParts[2], linkParts[3], linkParts.length); // DEBUG print
                                        //print(finalLink);

                                        theUrls.add(finalLink);
                                        System.out
                                                .println("...Processing the <"
                                                        + StringEscapeUtils.unescapeHtml4(elem.get(choice - 1)
                                                                .html().replace("Select category: ", ""))
                                                        + "> category");
                                        print(elem.get(choice - 1).select("a[href]").attr("abs:href"));
                                    } else {
                                        System.out.println("Wrong selection!");
                                        System.out.println("Exiting program...");
                                        System.exit(1);
                                    }
                                } else {
                                    System.out.println("ERROR: Cannot get links from server!");
                                    System.out.println("Exiting program...");
                                    System.exit(1);
                                }
                            }
                        } else {
                            System.out.println("Wrong selection!");
                            System.out.println("Exiting program...");
                            System.exit(1);
                        }
                    } else {
                        System.out.println("ERROR: Cannot get links from server!");
                        System.out.println("Exiting program...");
                        System.exit(1);
                    }
                } else {
                    theUrls.add(li.get(choice - 1).select("a[href").attr("abs:href"));
                    System.out.println("...Processing the <"
                            + StringEscapeUtils.unescapeHtml4(ratingsMenu.get(choice - 1).html())
                            + "> category");
                    print(li.get(choice - 1).select("a[href]").attr("abs:href"));
                }
            }
        } else {
            System.out.println("Wrong selection!");
            System.out.println("Exiting program...");
            System.exit(1);
        }
        break;

    case (5):
        System.out.println("Exiting program...");
        System.exit(0);
        break;

    default:
        System.out.println("Invalid choice! Exiting...");
        System.exit(1);
        break;

    }
}

From source file:noThreads.ParseLevel2.java

/**
 *
 * @param theLinks/*  ww w .  ja  v  a2 s.  co  m*/
 * @throws IOException
 */
public void getSecondLinks(ArrayList<String> theLinks) throws IOException {
    float num = 0;
    String temp, attrOfScr, subString;
    Document doc;
    boolean flag;
    for (String sLink : theLinks) {
        if ((sLink.endsWith(".asx") == true) || (sLink.endsWith(".swf") == true)) {
            stationLinks2.add(sLink);
            print("Written to file: %s", sLink);
        } else {
            //iframeCase(sLink);

            doc = parseUrl(sLink, 0);
            if (doc != null) {
                Elements media = doc.select("[src]");
                print("Fetching %s -->  ", sLink);
                flag = false;
                for (Element src : media) {
                    if (src.tagName().equals("embed") == true) {
                        flag = true;
                        temp = src.attr("abs:src");
                        if (temp.endsWith(".swf") == true) {
                            attrOfScr = src.attr("abs:flashvars");
                            //                                                    System.out.println("\nThis is src of embed tag: "
                            //                                                            +temp
                            //                                                            +"\nThis is attribute flashvars of embed tag: "
                            //                                                            +attrOfScr);
                            int start = attrOfScr.indexOf("http://", attrOfScr.indexOf("http://") + 1);
                            int end = attrOfScr.indexOf("&");
                            char a_char = attrOfScr.charAt(end - 1);

                            if (start != -1 && end != -1) {
                                if (a_char == ';') {
                                    subString = attrOfScr.substring(start, end - 1);
                                } else {
                                    subString = attrOfScr.substring(start, end);
                                }

                                //System.out.println("\nthis is the result subString: "+subString);
                                stationLinks2.add(subString);
                            } else {
                                //something's wrong, do not process the link
                                flag = false;
                            }
                            break;//link found                                                    
                        }
                        stationLinks2.add(temp);
                        break;//link found, load next url
                    }
                } //end nested for
                if (flag == false) {//the code has no embed tag
                    stationLinks2.add(sLink);
                }
            }
        }
        num = (float) (theLinks.indexOf(sLink)) / (float) (theLinks.size()) * WEIGHT_IN_COMPUTATION
                + curProgress.getCurProgressPart1();
        curProgress.setCurProgress((int) num);
    } //end outer for
    writeLinksToFile(links2FileName, stationLinks2);
    print("Written %s to file, second links.", stationLinks2.size());
}

From source file:org.aliuge.crawler.extractor.selector.AbstractElementCssSelector.java

/**
 * ??????/*from w w w.  j  a  v  a2  s. c o m*/
 * @param elements
 * @param attr
 * @return
 */
protected String getExtractAttr(Elements elements, String attr) {
    String temp = "";
    if (attr.equalsIgnoreCase("tostring")) {
        return temp = elements.attr(attr).toString();
    } else {
        if (index == -1 && StringUtils.isNotBlank(this.regex)) {
            for (Element e : elements) {
                Element element = e;
                if (element.select(this.regex).size() > 0) {
                    return temp = e.attr(attr);
                }
            }
            return temp;
        } else {
            if (index > -1 && index < elements.size()) {
                return elements.get(index).attr(attr);
            }
        }
        return elements.first().attr(attr);
    }
    /*if(null!=pattern){
       Matcher m = pattern.matcher(temp);
       if(m.find()){
    temp = m.group(1);
       }
    }*/
    //return temp;
}

From source file:org.aliuge.crawler.extractor.selector.action.ActionFactory.java

@SuppressWarnings("unchecked")
public static SelectorAction create(Element element, String c) {
    if ("string".equals(c)) {
        StringActionType $type = EnumUtils.getEnum(StringActionType.class, element.attr("operation"));
        if (null == $type) {
            try {
                throw new Exception(
                        "?" + element.tagName() + "operation");
            } catch (Exception e) {
                e.printStackTrace();/* w  ww .  ja v  a  2 s  .  com*/
            }
        }
        switch ($type) {
        case after:
            return new StringAfterAction(element.attr("split"));
        case afterLast:
            return new StringAfterLastAction(element.attr("split"));
        case before:
            return new StringBeforeAction(element.attr("split"));
        case beforeLast:
            return new StringBeforeLastAction(element.attr("split"));
        case between:
            return new StringBetweenAction(element.attr("exp"));
        case filter:
            return new StringFilterAction(element.attr("filter"), element.attr("charType"));
        case replace:
            /*
             * return new
             * StringReplaceAction(element.attr("exp"),element.attr
             * ("replacement"));
             */
            String exp = element.attr("exp");
            String[] kv = exp.split(",");
            if (kv.length == 2) {
                return new StringReplaceAction(kv[0], kv[1]);
            }

        case split:
            return new StringSplitAction(element.attr("split"), element.attr("index"));
        case sub:
            return new StringSubAction(element.attr("exp"));
        case suffix:
            return new StringSuffixAction(element.attr("suffix"));
        case perfix:
            return new StringPerfixAction(element.attr("perfix"));
        case regex:
            return new StringRegexAction(element.attr("exp"));
        default:
            break;
        }
    } else if ("integer".equals(c) || "int".equals(c)) {
        IntegerActionType $type = EnumUtils.getEnum(IntegerActionType.class, element.attr("operation"));
        switch ($type) {
        case abs:
            return new IntegerAbsAction();
        case between:
            try {
                return new IntegerBetweenAction(element.attr("exp"), element.attr("default"));
            } catch (IntegerBetweenExpressionException e) {
                e.printStackTrace();
            }
        default:
            break;
        }
    } else if ("date".equals(c)) {

    } else if ("numerica".equals(c)) {
        IntegerActionType $type = EnumUtils.getEnum(IntegerActionType.class, element.attr("operation"));
        switch ($type) {
        case abs:
            return new IntegerAbsAction();
        case between:
            try {
                return new IntegerBetweenAction(element.attr("exp"), element.attr("default"));
            } catch (Exception e) {
                e.printStackTrace();
            }
        default:
            break;
        }
    } else if ("file".equals(c)) {
        FileActionType $type = EnumUtils.getEnum(FileActionType.class, element.attr("operation"));
        switch ($type) {
        case download:
            String dir = element.attr("dir");
            String temp = element.attr("fileName");
            boolean md5File = false, asyn;
            if (StringUtils.isNotBlank(temp)) {
                if ("{md5}".equals(temp)) {
                    md5File = true;
                }
            } else
                md5File = true;

            temp = element.attr("asyn");
            if (StringUtils.isNotBlank(temp)) {
                asyn = Boolean.parseBoolean(temp);
            } else {
                asyn = true;
            }
            return new DownLoadFileAction(dir, md5File, asyn);
        case download_resize:
            String dir2 = element.attr("dir");
            String temp2 = element.attr("fileName");
            boolean md5File2 = false, asyn2;
            if (StringUtils.isNotBlank(temp2)) {
                if ("{md5}".equals(temp2)) {
                    md5File2 = true;
                }
            } else
                md5File2 = true;
            temp2 = element.attr("asyn");

            if (StringUtils.isNotBlank(temp2)) {
                asyn2 = Boolean.parseBoolean(temp2);
            } else {
                asyn2 = true;
            }
            DownLoadImageResizeAction resizeAction = new DownLoadImageResizeAction(dir2, md5File2, asyn2);

            temp2 = element.attr("width");
            if (StringUtils.isNotBlank(temp2)) {
                resizeAction.setW(Integer.parseInt(temp2));
            }

            temp2 = element.attr("height");
            if (StringUtils.isNotBlank(temp2)) {
                resizeAction.setH(Integer.parseInt(temp2));
            }
            temp2 = element.attr("quality");
            if (StringUtils.isNotBlank(temp2)) {
                resizeAction.setQuality(Float.parseFloat(temp2));
            }
            temp2 = element.attr("del");
            if (StringUtils.isNotBlank(temp2)) {
                resizeAction.setDeleteOldFile(Boolean.parseBoolean(temp2));
            }
            return resizeAction;
        default:
            break;
        }
    } else {
        StringActionType $type = EnumUtils.getEnum(StringActionType.class, element.attr("operation"));
        if (null == $type) {
            try {
                throw new Exception(
                        "?" + element.tagName() + "operation");
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        switch ($type) {
        case after:
            return new StringAfterAction(element.attr("split"));
        case afterLast:
            return new StringAfterLastAction(element.attr("split"));
        case before:
            return new StringBeforeAction(element.attr("split"));
        case beforeLast:
            return new StringBeforeLastAction(element.attr("split"));
        case between:
            return new StringBetweenAction(element.attr("exp"));
        case filter:
            return new StringFilterAction(element.attr("filter"), element.attr("charType"));
        case replace:
            return new StringReplaceAction(element.attr("search"), element.attr("replacement"));
        case split:
            return new StringSplitAction(element.attr("split"), element.attr("index"));
        case sub:
            return new StringSubAction(element.attr("exp"));
        case suffix:
            return new StringSuffixAction(element.attr("suffix"));
        case perfix:
            return new StringPerfixAction(element.attr("perfix"));
        default:
            break;
        }
    }
    return null;
}

From source file:org.aliuge.crawler.extractor.selector.factory.ElementCssSelectorFactory.java

/**
 * <b>Element??Element??select/*from  www.  j  a v a 2 s .  c om*/
 * @param element
 * @return
 */
@SuppressWarnings("unchecked")
public static AbstractElementCssSelector create(Element element) {
    String name = element.attr("name");
    String value = element.attr("value");
    String type = element.attr("type");

    String attr = element.attr("attr");
    String pattern = element.attr("pattern");
    String regex = element.attr("regex");
    String required = element.attr("required");
    String sIndex = element.attr("index");
    boolean isRequired = false;
    if (StringUtils.isNotBlank(required)) {
        isRequired = Boolean.parseBoolean(required);
    }
    int index = -1;
    if (StringUtils.isNotBlank(sIndex)) {
        index = Integer.parseInt(sIndex) - 1;
    }
    AbstractElementCssSelector selector = ElementCssSelectorFactory.create(name, type, value, attr, isRequired,
            index, regex, pattern);
    // ?
    Elements children = element.children();
    for (Element e : children) {
        if ("action".equals(e.tagName())) {
            SelectorAction action = ActionFactory.create(e, element.attr("type"));
            if (action != null)
                selector.addAction(action);
        }
        // ?Url
        else if ("element".equals(e.tagName())) {
            ((PageElementSelector) selector).addSelector(create(e));
        }
    }
    return selector;
}

From source file:org.aliuge.crawler.extractor.selector.IFConditions.java

/**
 * ?<b>elementIf?</br>//from w w w .  j av  a 2  s . co m
 * ??
 * @param document
 * @return
 */
public static IFConditions create(Element element) {
    if (element != null) {
        String exp = element.attr("test");
        IFConditions iFconditions = new IFConditions(exp);
        Elements selectElements = element.children();
        for (Element e : selectElements) {
            if (e.tagName().equals("element")) {
                iFconditions.addSelector(ElementCssSelectorFactory.create(e));
            }
        }
        return iFconditions;
    }
    return null;
}

From source file:org.aliuge.crawler.jobconf.ExtractConfig.java

/**
 * ????//from   w ww  .  j a  v  a  2 s .  com
 * @param doc
 * @return
 * @throws ConfigurationException
 */
public ExtractConfig loadConfig(Document doc) {
    Elements extractElement = doc.select("extract");
    super.setJobName(doc.select("job").attr("name"));
    super.setIndexName(doc.select("job").attr("indexName"));
    String temp = extractElement.select("threadNum").text();
    if (StringUtils.isNotBlank(temp)) {
        this.threadNum = Integer.parseInt(temp);
    }

    Elements templateElement = extractElement.select("extract").select("template");
    Iterator<Element> it = templateElement.iterator();

    while (it.hasNext()) {
        Element template = it.next();
        ExtractTemplate extractTemplate = new ExtractTemplate();
        // ?Url????
        Elements urlPatternElement = template.select("url");
        List<Pattern> patterns = Lists.newArrayList();
        for (Element urlElement : urlPatternElement) {
            patterns.add(Pattern.compile(urlElement.text()));
        }
        extractTemplate.setUrlPattern(patterns);
        extractTemplate.setName(template.attr("name"));
        // ???
        Elements selectElement = template.select("elements").first().children();
        for (Element element : selectElement) {
            if ("element".equals(element.tagName())) {
                AbstractElementCssSelector<?> selector = ElementCssSelectorFactory.create(element);
                extractTemplate.addCssSelector(selector);
            } else if ("if".equals(element.tagName())) {
                IFConditions ifConditions = IFConditions.create(element);
                extractTemplate.addConditions(ifConditions);
            }
        }
        super.setExtractConfig(this);
        this.templates.add(extractTemplate);
    }
    //super.setExtractConfig(this);
    return this;
}