Example usage for org.jsoup.nodes Element html

List of usage examples for org.jsoup.nodes Element html

Introduction

In this page you can find the example usage for org.jsoup.nodes Element html.

Prototype

public String html() 

Source Link

Document

Retrieves the element's inner HTML.

Usage

From source file:net.pixomania.crawler.W3C.parser.rules.editors.version.VersionEditorRule1.java

@Override
public ArrayList<Person> run(String url, Document doc) {
    ArrayList<Person> editorList = new ArrayList<>();

    Elements editors = doc.select("dt:contains(version 1), dt:contains(version 1) ~ dd");
    if (editors.size() == 0)
        return null;

    boolean skip = false;
    String version = "";
    for (Element editor : editors) {
        Element prev = editor.previousElementSibling();
        if (prev != null) {
            if (prev.tagName().equals("dt")) {
                if (!prev.text().trim().toLowerCase().startsWith("version 1")
                        && !prev.text().trim().toLowerCase().startsWith("editors (version 1")) {
                    skip = true;//ww  w  .ja  va  2 s .c  o m
                }
            }

            if (skip) {
                Element next = editor.nextElementSibling();
                if (next != null) {
                    if (next.text().trim().toLowerCase().startsWith("version 1")
                            || next.text().trim().toLowerCase().startsWith("editors (version 1")) {
                        skip = false;

                        continue;
                    }
                }
                continue;
            }
        }

        if (editor.tagName().equals("dt")) {
            version = editor.text();
            continue;
        }

        String[] splitted = editor.html().split("<br />|<br clear=\"none\" />");

        if (splitted.length < 2) {
            if (editor.text().toLowerCase().startsWith("(in alphabetic")
                    || editor.text().toLowerCase().startsWith("see acknowl")
                    || editor.text().toLowerCase().startsWith("the w3")
                    || editor.text().toLowerCase().startsWith("(see ac")
                    || editor.text().toLowerCase().startsWith("see participants")
                    || editor.text().toLowerCase().contains("note:")) {
                Log.log("warning", "Spec " + url + " may refer to a different section!");
                continue;
            }
            if (editor.text().equals("WHATWG:") || editor.text().equals("W3C:"))
                continue;
            Person result = NameParser.parse(editor.text());
            if (result == null)
                continue;

            result.setVersion(version);
            for (int i = 0; i < editor.select("a").size(); i++) {
                if (!editor.select("a").get(i).attr("href").isEmpty()) {
                    if (editor.select("a").get(i).attr("href").contains("@")) {
                        result.setEmail(editor.select("a").get(i).attr("href").replace("mailto:", ""));
                    } else {
                        result.addWebsite(editor.select("a").get(i).attr("href"));
                    }
                }
            }

            editorList.add(result);
        } else {
            for (String split : splitted) {
                if (!split.isEmpty()) {
                    if (split.toLowerCase().startsWith("(in alphabetic")
                            || split.toLowerCase().startsWith("see acknowl")
                            || split.toLowerCase().startsWith("the w3")
                            || split.toLowerCase().startsWith("(see ac")
                            || split.toLowerCase().startsWith("see participants")
                            || split.toLowerCase().contains("note:")) {
                        Log.log("warning", "Spec " + url + " may refer to a different section!");
                        continue;
                    }
                    if (split.equals("WHATWG:") || split.equals("W3C:"))
                        continue;
                    Document newdoc = Jsoup.parse(split.replaceAll("\n", ""));
                    Person result = NameParser.parse(newdoc.text());
                    if (result == null)
                        continue;

                    result.setVersion(version);
                    for (int i = 0; i < newdoc.select("a").size(); i++) {
                        if (!newdoc.select("a").get(i).attr("href").isEmpty()) {
                            if (newdoc.select("a").get(i).attr("href").contains("@")) {
                                result.setEmail(newdoc.select("a").get(i).attr("href").replace("mailto:", ""));
                            } else {
                                result.addWebsite(newdoc.select("a").get(i).attr("href"));
                            }
                        }
                    }

                    editorList.add(result);
                }
            }
        }

        Element next = editor.nextElementSibling();
        if (next != null)
            if (next.tag().getName().equals("dt")
                    && !next.text().trim().toLowerCase().startsWith("editors (version 1"))
                break;
    }

    if (editorList.size() == 0)
        return null;

    return editorList;
}

From source file:be.ibridge.kettle.jsoup.JsoupInput.java

private Object[] buildRow() throws KettleException {
    // Create new row...
    Object[] outputRowData = buildEmptyRow();

    if (data.readrow != null)
        outputRowData = data.readrow.clone();

    // Read fields...
    for (int i = 0; i < data.nrInputFields; i++) {
        // Get field
        JsoupInputField field = meta.getInputFields()[i];

        // get jsoup array for field
        Elements jsoupa = data.resultList.get(i);
        String nodevalue = null;//w  ww .  jav a 2  s  .  c  om
        if (jsoupa != null) {
            Element jo = jsoupa.get(data.recordnr);
            if (jo != null) {

                // Do Element Type
                switch (field.getElementType()) {
                case JsoupInputField.ELEMENT_TYPE_NODE:
                    // Do Result Type
                    switch (field.getResultType()) {
                    case JsoupInputField.RESULT_TYPE_TEXT:
                        nodevalue = jo.text();
                        break;
                    case JsoupInputField.RESULT_TYPE_TYPE_OUTER_HTML:
                        nodevalue = jo.outerHtml();
                        break;
                    case JsoupInputField.RESULT_TYPE_TYPE_INNER_HTML:
                        nodevalue = jo.html();
                        break;
                    default:
                        nodevalue = jo.toString();
                        break;
                    }
                    break;
                case JsoupInputField.ELEMENT_TYPE_ATTRIBUT:
                    nodevalue = jo.attr(field.getAttribute());
                    break;
                default:
                    nodevalue = jo.toString();
                    break;
                }
            }
        }

        // Do trimming
        switch (field.getTrimType()) {
        case JsoupInputField.TYPE_TRIM_LEFT:
            nodevalue = Const.ltrim(nodevalue);
            break;
        case JsoupInputField.TYPE_TRIM_RIGHT:
            nodevalue = Const.rtrim(nodevalue);
            break;
        case JsoupInputField.TYPE_TRIM_BOTH:
            nodevalue = Const.trim(nodevalue);
            break;
        default:
            break;
        }

        if (meta.isInFields()) {
            // Add result field to input stream
            outputRowData = RowDataUtil.addValueData(outputRowData, data.totalpreviousfields + i, nodevalue);
        }
        // Do conversions
        //
        ValueMetaInterface targetValueMeta = data.outputRowMeta.getValueMeta(data.totalpreviousfields + i);
        ValueMetaInterface sourceValueMeta = data.convertRowMeta.getValueMeta(data.totalpreviousfields + i);
        outputRowData[data.totalpreviousfields + i] = targetValueMeta.convertData(sourceValueMeta, nodevalue);

        // Do we need to repeat this field if it is null?
        if (meta.getInputFields()[i].isRepeated()) {
            if (data.previousRow != null && Const.isEmpty(nodevalue)) {
                outputRowData[data.totalpreviousfields + i] = data.previousRow[data.totalpreviousfields + i];
            }
        }
    } // End of loop over fields...   

    int rowIndex = data.nrInputFields;

    // See if we need to add the filename to the row...
    if (meta.includeFilename() && !Const.isEmpty(meta.getFilenameField())) {
        outputRowData[rowIndex++] = data.filename;
    }
    // See if we need to add the row number to the row...  
    if (meta.includeRowNumber() && !Const.isEmpty(meta.getRowNumberField())) {
        outputRowData[rowIndex++] = new Long(data.rownr);
    }
    // Possibly add short filename...
    if (meta.getShortFileNameField() != null && meta.getShortFileNameField().length() > 0) {
        outputRowData[rowIndex++] = data.shortFilename;
    }
    // Add Extension
    if (meta.getExtensionField() != null && meta.getExtensionField().length() > 0) {
        outputRowData[rowIndex++] = data.extension;
    }
    // add path
    if (meta.getPathField() != null && meta.getPathField().length() > 0) {
        outputRowData[rowIndex++] = data.path;
    }
    // Add Size
    if (meta.getSizeField() != null && meta.getSizeField().length() > 0) {
        outputRowData[rowIndex++] = new Long(data.size);
    }
    // add Hidden
    if (meta.isHiddenField() != null && meta.isHiddenField().length() > 0) {
        outputRowData[rowIndex++] = new Boolean(data.path);
    }
    // Add modification date
    if (meta.getLastModificationDateField() != null && meta.getLastModificationDateField().length() > 0) {
        outputRowData[rowIndex++] = data.lastModificationDateTime;
    }
    // Add Uri
    if (meta.getUriField() != null && meta.getUriField().length() > 0) {
        outputRowData[rowIndex++] = data.uriName;
    }
    // Add RootUri
    if (meta.getRootUriField() != null && meta.getRootUriField().length() > 0) {
        outputRowData[rowIndex++] = data.rootUriName;
    }
    data.recordnr++;

    RowMetaInterface irow = getInputRowMeta();

    data.previousRow = irow == null ? outputRowData : (Object[]) irow.cloneRow(outputRowData); // copy it to make
    // surely the next step doesn't change it in between...

    return outputRowData;
}

From source file:net.pixomania.crawler.W3C.parser.rules.editors.EditorsRule2.java

@Override
public ArrayList<Person> run(String url, Document doc) {
    ArrayList<Person> editorList = new ArrayList<>();

    Elements editors = doc.select("dt:contains(Editor) ~ dd, dt:contains(Edition Editor) ~ dd");
    if (editors.size() == 0)
        return null;

    boolean skip = false;
    for (Element editor : editors) {
        Element prev = editor.previousElementSibling();
        if (prev.tagName().equals("dt")) {
            if ((!prev.text().trim().toLowerCase().startsWith("editor")
                    && !prev.text().trim().toLowerCase().startsWith("edition editor"))
                    || prev.text().trim().toLowerCase().contains("version")
                    || prev.text().trim().toLowerCase().endsWith("draft:")) {
                skip = true;//ww  w .j a  v  a2  s  . com
            }
        }

        if (skip) {
            Element next = editor.nextElementSibling();
            if (next != null) {
                if (next.text().trim().toLowerCase().startsWith("editor")
                        || next.text().trim().toLowerCase().contains("edition editor")) {
                    skip = false;
                    continue;
                }
            }
            continue;
        }

        if (StringUtils.countMatches(editor.text(), " - ") > 2) {
            Log.log("warning", "This editor may be a list of editors separated by  - ");
            EditorsRule5 ed5 = new EditorsRule5();

            return ed5.run(url, doc);
        }

        String[] splitted = editor.html().split("<br />|<br clear=\"none\" />");

        if (splitted.length < 2) {
            if (editor.text().toLowerCase().startsWith("(in alphabetic")
                    || editor.text().toLowerCase().startsWith("see acknowl")
                    || editor.text().toLowerCase().startsWith("the w3")
                    || editor.text().toLowerCase().startsWith("(see ac")
                    || editor.text().toLowerCase().startsWith("see participants")
                    || editor.text().toLowerCase().contains("note:")) {
                Log.log("warning", "Spec " + url + " may refer to a different section!");
                continue;
            }
            if (editor.text().equals("WHATWG:") || editor.text().equals("W3C:"))
                continue;
            Person result = NameParser.parse(editor.text());
            if (result == null)
                continue;

            for (int i = 0; i < editor.select("a").size(); i++) {
                if (!editor.select("a").get(i).attr("href").isEmpty()) {
                    if (editor.select("a").get(i).attr("href").contains("@")) {
                        result.setEmail(editor.select("a").get(i).attr("href").replace("mailto:", ""));
                    } else {
                        result.addWebsite(editor.select("a").get(i).attr("href"));
                    }
                }
            }

            editorList.add(result);
        } else {
            for (String split : splitted) {
                if (!split.isEmpty()) {
                    if (split.toLowerCase().startsWith("(in alphabetic")
                            || split.toLowerCase().startsWith("see acknowl")
                            || split.toLowerCase().startsWith("the w3")
                            || split.toLowerCase().startsWith("(see ac")
                            || split.toLowerCase().startsWith("see participants")
                            || split.toLowerCase().contains("note:")) {
                        Log.log("warning", "Spec " + url + " may refer to a different section!");
                        continue;
                    }
                    if (split.equals("WHATWG:") || split.equals("W3C:"))
                        continue;
                    Document newdoc = Jsoup.parse(split.replaceAll("\n", ""));
                    Person result = NameParser.parse(newdoc.text());
                    if (result == null)
                        continue;

                    for (int i = 0; i < newdoc.select("a").size(); i++) {
                        if (!newdoc.select("a").get(i).attr("href").isEmpty()) {
                            if (newdoc.select("a").get(i).attr("href").contains("@")) {
                                result.setEmail(newdoc.select("a").get(i).attr("href").replace("mailto:", ""));
                            } else {
                                result.addWebsite(newdoc.select("a").get(i).attr("href"));
                            }
                        }
                    }

                    editorList.add(result);
                }
            }
        }

        Element next = editor.nextElementSibling();
        if (next != null)
            if (next.tag().getName().equals("dt"))
                break;
    }

    if (editorList.size() == 0)
        return null;

    return editorList;
}

From source file:net.pixomania.crawler.W3C.parser.rules.editors.EditorsRule8.java

@Override
public ArrayList<Person> run(String url, Document doc) {
    ArrayList<Person> editorList = new ArrayList<>();

    Elements editors = doc.select("h4:contains(Editor) ~ blockquote");
    if (editors.size() == 0)
        return null;

    boolean skip = false;
    for (Element editor : editors) {
        Element prev = editor.previousElementSibling();
        if (prev.tagName().equals("h4")) {
            if ((!prev.text().trim().toLowerCase().startsWith("editor")
                    && !prev.text().trim().toLowerCase().startsWith("edition editor"))
                    || prev.text().trim().toLowerCase().endsWith("version:")
                    || prev.text().trim().toLowerCase().endsWith("draft:")) {
                skip = true;/*from   w  w  w  . j  a  va2s. co m*/
            }
        }

        if (skip) {
            Element next = editor.nextElementSibling();
            if (next != null) {
                if (next.text().trim().toLowerCase().startsWith("editor")
                        || next.text().trim().toLowerCase().contains("edition editor")) {
                    skip = false;
                    continue;
                }
            }
            continue;
        }

        if (StringUtils.countMatches(editor.text(), " - ") > 2) {
            Log.log("warning", "This editor may be a list of editors separated by  - ");
            EditorsRule5 ed5 = new EditorsRule5();

            return ed5.run(url, doc);
        }

        String[] splitted = editor.html().split("<br />|<br clear=\"none\" />");

        if (splitted.length < 2) {
            if (editor.text().toLowerCase().startsWith("(in alphabetic")
                    || editor.text().toLowerCase().startsWith("see acknowl")
                    || editor.text().toLowerCase().startsWith("the w3")
                    || editor.text().toLowerCase().startsWith("(see ac")
                    || editor.text().toLowerCase().startsWith("see participants")
                    || editor.text().toLowerCase().contains("note:")) {
                Log.log("warning", "Spec " + url + " may refer to a different section!");
                continue;
            }
            if (editor.text().equals("WHATWG:") || editor.text().equals("W3C:"))
                continue;
            Person result = NameParser.parse(editor.text());
            if (result == null)
                continue;

            for (int i = 0; i < editor.select("a").size(); i++) {
                if (!editor.select("a").get(i).attr("href").isEmpty()) {
                    if (editor.select("a").get(i).attr("href").contains("@")) {
                        result.setEmail(editor.select("a").get(i).attr("href").replace("mailto:", ""));
                    } else {
                        result.addWebsite(editor.select("a").get(i).attr("href"));
                    }
                }
            }

            editorList.add(result);
        } else {
            for (String split : splitted) {
                if (!split.isEmpty()) {
                    if (split.toLowerCase().startsWith("(in alphabetic")
                            || split.toLowerCase().startsWith("see acknowl")
                            || split.toLowerCase().startsWith("the w3")
                            || split.toLowerCase().startsWith("(see ac")
                            || split.toLowerCase().startsWith("see participants")
                            || split.toLowerCase().contains("note:")) {
                        Log.log("warning", "Spec " + url + " may refer to a different section!");
                        continue;
                    }
                    if (split.equals("WHATWG:") || split.equals("W3C:"))
                        continue;
                    Document newdoc = Jsoup.parse(split.replaceAll("\n", ""));
                    Person result = NameParser.parse(newdoc.text());
                    if (result == null)
                        continue;

                    for (int i = 0; i < newdoc.select("a").size(); i++) {
                        if (!newdoc.select("a").get(i).attr("href").isEmpty()) {
                            if (newdoc.select("a").get(i).attr("href").contains("@")) {
                                result.setEmail(newdoc.select("a").get(i).attr("href").replace("mailto:", ""));
                            } else {
                                result.addWebsite(newdoc.select("a").get(i).attr("href"));
                            }
                        }
                    }

                    editorList.add(result);
                }
            }
        }

        Element next = editor.nextElementSibling();
        if (next != null)
            if (next.tag().getName().equals("h4"))
                break;
    }

    if (editorList.size() == 0)
        return null;

    return editorList;
}

From source file:net.kevxu.purdueassist.course.ScheduleDetail.java

private ScheduleDetailEntry parseDocument(Document document)
        throws HtmlParseException, CourseNotFoundException, ResultNotMatchException {
    ScheduleDetailEntry entry = new ScheduleDetailEntry(term, crn);
    Elements tableElements = document.getElementsByAttributeValue("summary",
            "This table is used to present the detailed class information.");

    if (!tableElements.isEmpty()) {
        for (Element tableElement : tableElements) {
            // get basic info for selected course
            Element tableBasicInfoElement = tableElement.getElementsByClass("ddlabel").first();
            if (tableBasicInfoElement != null) {
                setBasicInfo(entry, tableBasicInfoElement.text());
            } else {
                throw new HtmlParseException("Basic info element empty.");
            }//from   ww w .  j a  v a 2  s  . c  o  m

            // get detailed course info
            Element tableDetailedInfoElement = tableElement.getElementsByClass("dddefault").first();

            if (tableDetailedInfoElement != null) {
                // process seat info
                Elements tableSeatDetailElements = tableDetailedInfoElement.getElementsByAttributeValue(
                        "summary", "This layout table is used to present the seating numbers.");
                if (tableSeatDetailElements.size() == 1) {
                    Element tableSeatDetailElement = tableSeatDetailElements.first();
                    Elements tableSeatDetailEntryElements = tableSeatDetailElement.getElementsByTag("tbody")
                            .first().children();
                    if (tableSeatDetailEntryElements.size() == 3 || tableSeatDetailEntryElements.size() == 4) {
                        setSeats(entry, tableSeatDetailEntryElements.get(1).text());
                        setWaitlistSeats(entry, tableSeatDetailEntryElements.get(2).text());
                        if (tableSeatDetailEntryElements.size() == 4) {
                            setCrosslistSeats(entry, tableSeatDetailEntryElements.get(3).text());
                        }
                    } else {
                        throw new HtmlParseException("Seat detail entry elements size not 3. We have "
                                + tableSeatDetailEntryElements.size() + ".");
                    }
                } else {
                    throw new HtmlParseException(
                            "Seat detail elements size not 1. We have " + tableSeatDetailElements.size() + ".");
                }
                // remove the seat info from detailed info
                tableSeatDetailElements.remove();

                // remaining information
                setRemainingInfo(entry, tableDetailedInfoElement.html());

            } else {
                throw new HtmlParseException("Detailed info element empty.");
            }

        }
    } else {
        // test empty
        Elements informationElements = document.getElementsByAttributeValue("summary",
                "This layout table holds message information");
        if (!informationElements.isEmpty()
                && informationElements.text().contains("No detailed class information found")) {
            throw new CourseNotFoundException(informationElements.text());
        } else {
            throw new HtmlParseException(
                    "Course table not found, but page does not contain message stating no course found.");
        }
    }

    return entry;
}

From source file:de.geeksfactory.opacclient.apis.Open.java

protected SearchRequestResult parse_search(Document doc, int page) throws OpacErrorException {
    searchResultDoc = doc;//from w ww . ja v  a 2s  . com

    if (doc.select("#Label1, span[id$=LblInfoMessage]").size() > 0) {
        String message = doc.select("#Label1, span[id$=LblInfoMessage]").text();
        if (message.contains("keine Treffer")) {
            return new SearchRequestResult(new ArrayList<SearchResult>(), 0, 1, page);
        } else {
            throw new OpacErrorException(message);
        }
    }

    int totalCount = Integer.parseInt(doc.select("span[id$=TotalItemsLabel]").first().text());

    Elements elements = doc.select("div[id$=divMedium], div[id$=divComprehensiveItem]");
    List<SearchResult> results = new ArrayList<>();
    int i = 0;
    for (Element element : elements) {
        SearchResult result = new SearchResult();
        // Cover
        if (element.select("input[id$=mediumImage]").size() > 0) {
            result.setCover(element.select("input[id$=mediumImage]").first().attr("src"));
        } else if (element.select("img[id$=CoverView_Image]").size() > 0) {
            result.setCover(getCoverUrl(element.select("img[id$=CoverView_Image]").first()));
        }

        Element catalogueContent = element.select(".catalogueContent").first();
        // Media Type
        if (catalogueContent.select("#spanMediaGrpIcon").size() > 0) {
            String mediatype = catalogueContent.select("#spanMediaGrpIcon").attr("class");
            if (mediatype.startsWith("itemtype ")) {
                mediatype = mediatype.substring("itemtype ".length());
            }

            SearchResult.MediaType defaulttype = defaulttypes.get(mediatype);
            if (defaulttype == null)
                defaulttype = SearchResult.MediaType.UNKNOWN;

            if (data.has("mediatypes")) {
                try {
                    result.setType(SearchResult.MediaType
                            .valueOf(data.getJSONObject("mediatypes").getString(mediatype)));
                } catch (JSONException e) {
                    result.setType(defaulttype);
                }
            } else {
                result.setType(defaulttype);
            }
        } else {
            result.setType(SearchResult.MediaType.UNKNOWN);
        }

        // Text
        String title = catalogueContent.select("a[id$=LbtnShortDescriptionValue], a[id$=LbtnTitleValue]")
                .text();
        String subtitle = catalogueContent.select("span[id$=LblSubTitleValue]").text();
        String author = catalogueContent.select("span[id$=LblAuthorValue]").text();
        String year = catalogueContent.select("span[id$=LblProductionYearValue]").text();
        String publisher = catalogueContent
                .select("span[id$=LblManufacturerValue], span[id$=LblPublisherValue]").text();
        String series = catalogueContent.select("span[id$=LblSeriesValue]").text();

        StringBuilder text = new StringBuilder();
        text.append("<b>").append(title).append("</b>");
        if (!subtitle.equals(""))
            text.append("<br/>").append(subtitle);
        if (!author.equals(""))
            text.append("<br/>").append(author);
        if (!year.equals(""))
            text.append("<br/>").append(year);
        if (!publisher.equals(""))
            text.append("<br/>").append(publisher);
        if (!series.equals(""))
            text.append("<br/>").append(series);

        result.setInnerhtml(text.toString());

        // ID
        Pattern idPattern = Pattern.compile("\\$mdv(\\d+)\\$");
        Matcher matcher = idPattern.matcher(catalogueContent.html());
        if (matcher.find()) {
            result.setId(matcher.group(1));
        }

        // Availability
        if (result.getId() != null) {
            String url = opac_url + "/DesktopModules/OCLC.OPEN.PL.DNN.SearchModule/SearchService"
                    + ".asmx/GetAvailability";
            String culture = element.select("input[name$=culture]").val();
            JSONObject data = new JSONObject();
            try {
                // Determine portalID value
                int portalId = 1;
                for (Element scripttag : doc.select("script")) {
                    String scr = scripttag.html();
                    if (scr.contains("LoadSharedCatalogueViewAvailabilityAsync")) {
                        Pattern portalIdPattern = Pattern
                                .compile(".*LoadSharedCatalogueViewAvailabilityAsync\\([^,]*,[^,]*,"
                                        + "[^0-9,]*([0-9]+)[^0-9,]*,.*\\).*");
                        Matcher portalIdMatcher = portalIdPattern.matcher(scr);
                        if (portalIdMatcher.find()) {
                            portalId = Integer.parseInt(portalIdMatcher.group(1));
                        }
                    }
                }

                data.put("portalId", portalId).put("mednr", result.getId()).put("culture", culture)
                        .put("requestCopyData", false).put("branchFilter", "");
                StringEntity entity = new StringEntity(data.toString());
                entity.setContentType(ContentType.APPLICATION_JSON.getMimeType());
                String json = httpPost(url, entity, getDefaultEncoding());
                JSONObject availabilityData = new JSONObject(json);
                String isAvail = availabilityData.getJSONObject("d").getString("IsAvail");
                switch (isAvail) {
                case "true":
                    result.setStatus(SearchResult.Status.GREEN);
                    break;
                case "false":
                    result.setStatus(SearchResult.Status.RED);
                    break;
                case "digital":
                    result.setStatus(SearchResult.Status.UNKNOWN);
                    break;
                }

            } catch (JSONException | IOException e) {
                e.printStackTrace();
            }
        }

        result.setNr(i);
        results.add(result);
    }
    return new SearchRequestResult(results, totalCount, page);
}

From source file:com.storm.function.GsxtFunction.java

private Map<String, Object> getHtmlInfoMapOfLiaoning(String area, HtmlPage firstInfoPage, String keyword,
        ChannelLogger LOGGER) throws Exception {

    LOGGER.info("=========" + area + "=========" + keyword + "=========");

    Map<String, Object> resultHtmlMap = new LinkedHashMap<String, Object>();

    if (null == firstInfoPage) {

        resultHtmlMap.put("statusCodeDef", StatusCodeDef.FAILURE);

    } else {/*ww  w  . j av  a  2 s.  c  o m*/

        WebWindow webWindow = firstInfoPage.getWebClient().getCurrentWindow();

        final String HOST_OF_LIAONING = "http://gsxt.lngs.gov.cn";

        @SuppressWarnings("unchecked")
        List<HtmlAnchor> anchors = (List<HtmlAnchor>) firstInfoPage
                .getByXPath("//div[@id='listContent']/div/ul/li/a");
        HtmlElement div_none = firstInfoPage.getFirstByXPath("//div[@class='list-a']");

        if (null == anchors || anchors.isEmpty()) {
            if (null == div_none) {
                resultHtmlMap.put("statusCodeDef", StatusCodeDef.IMAGECODE_ERROR);
            } else {
                if (div_none.asXml().contains("??")) {
                    resultHtmlMap.put("statusCodeDef", StatusCodeDef.NO_DATA_FOUND);
                } else {
                    resultHtmlMap.put("statusCodeDef", StatusCodeDef.IMAGECODE_ERROR);
                }
            }
        }

        HtmlAnchor htmlAnchor = null;
        boolean flag = false;

        if (anchors != null && !anchors.isEmpty()) {
            for (HtmlAnchor anchor : anchors) {
                String anchorTitle = anchor.getTextContent().toString().trim();
                if (anchorTitle.contains(keyword)) { // ????
                    htmlAnchor = anchor;
                    flag = true;
                    break;
                }
            }
            if (!flag) {
                resultHtmlMap.put("statusCodeDef", StatusCodeDef.NO_DATA_FOUND);
                LOGGER.info("????");
            }
        }

        if (flag) {

            // ?
            String pripid = "";
            String type = "";

            // ???
            HtmlElement target_item_info = (HtmlElement) htmlAnchor.getParentNode().getParentNode();
            resultHtmlMap.put("target_item_info", target_item_info.asXml());
            String liaoning_onclick = htmlAnchor.getAttribute("onclick");
            if (!StringUtils.isEmpty(liaoning_onclick)) {
                pripid = liaoning_onclick.split(",")[2].replace("'", "").replace("'", "");
                type = liaoning_onclick.split(",")[1].replace("'", "").replace("'", "");
            }

            // ???
            HtmlPage gsgsxx = htmlAnchor.click();
            Thread.sleep(3000);
            Document gsgsxx_dm = Jsoup.parseBodyFragment(gsgsxx.asXml());
            Element s_gs_dj_1 = gsgsxx_dm.getElementById("s_gs_dj_1");
            // Element s_gs_dj_2 = gsgsxx_dm.getElementById("s_gs_dj_2");

            // ???->?->?
            // String jbxx_url =
            // "http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/getJbxxAction.action?pripid="
            // + pripid + "&type=" + type;
            // HtmlPage gsgsxx_djxx_jbxx = firstInfoPage.getWebClient()
            // .getPage(jbxx_url);
            String gsgsxx_djxx_jbxx_str = null;
            if (null != s_gs_dj_1) {
                gsgsxx_djxx_jbxx_str = s_gs_dj_1.html();
            }
            resultHtmlMap.put("gsgsxx_djxx_jbxx", gsgsxx_djxx_jbxx_str);

            // ??->?->?
            //String gsgsxx_djxx_tzrxx_str = null;
            //if (null != s_gs_dj_2) {
            //   gsgsxx_djxx_tzrxx_str = s_gs_dj_2.html();
            //}
            //resultHtmlMap.put("gsgsxx_djxx_tzrxx", gsgsxx_djxx_tzrxx_str);
            // ????->?->
            @SuppressWarnings("unchecked")
            List<HtmlAnchor> touziren_anchors = (List<HtmlAnchor>) gsgsxx
                    .getByXPath("//tbody[@id='tzr_itemContainer']/tr/td/a");
            if (null != touziren_anchors && !touziren_anchors.isEmpty()) {
                List<Map<String, Object>> gsgsxx_djxx_tzrxx_xqs = new ArrayList<Map<String, Object>>();
                for (HtmlAnchor touziren_anchor : touziren_anchors) {
                    Map<String, Object> gsgsxx_djxx_tzrxx_xq = new LinkedHashMap<String, Object>();
                    HtmlPage gsgsxx_djxx_tzrxx_xq_page = touziren_anchor.click();
                    gsgsxx_djxx_tzrxx_xq.put("gsgsxx_djxx_tzrxx_xq", gsgsxx_djxx_tzrxx_xq_page.asXml());
                    gsgsxx_djxx_tzrxx_xqs.add(gsgsxx_djxx_tzrxx_xq);
                }
                resultHtmlMap.put("gsgsxx_djxx_tzrxx_xqs", gsgsxx_djxx_tzrxx_xqs);
            }

            WebClient wc = firstInfoPage.getWebClient();
            wc.getOptions().setJavaScriptEnabled(false);
            // ??->?->?
            String tzr_url = "http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/getTzrxxAction.action?pripid="
                    + pripid + "&type=" + type;
            HtmlPage tzrPage = wc.getPage(tzr_url);
            String gsgsxx_djxx_tzrxx_str = null;
            if (null != tzrPage) {
                gsgsxx_djxx_tzrxx_str = tzrPage.asXml();
            }
            resultHtmlMap.put("gsgsxx_djxx_tzrxx", gsgsxx_djxx_tzrxx_str);

            // ??->?->??
            String bgxx_url = "http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/getBgxxAction.action?pripid="
                    + pripid + "&type=" + type;
            HtmlPage bgxxPage = wc.getPage(webWindow, new WebRequest(new URL(bgxx_url)));
            // ??->?->??
            String gsgsxx_djxx_bgxx_str = null;
            if (null != bgxxPage) {
                gsgsxx_djxx_bgxx_str = bgxxPage.asXml();
            }
            resultHtmlMap.put("gsgsxx_djxx_bgxx", gsgsxx_djxx_bgxx_str);
            // String[] command = {"casperjs",
            // "/home/ubuntu/nfs-images/casperjscode/getSimpleRequestPage.js",
            // "--web-security=no", "--url=" + bgxx_url};
            // String casperjsResult = CommandUtil.runCommand(command);
            // resultHtmlMap.put("gsgsxx_djxx_bgxx", casperjsResult);

            // ??->?->??
            String gsgsxx_baxx_zyryxx_url = "http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/getZyryxxAction.action?pripid="
                    + pripid + "&type=" + type;
            HtmlPage gsgsxx_baxx_zyryxx_page = wc.getPage(webWindow,
                    new WebRequest(new URL(gsgsxx_baxx_zyryxx_url)));
            String gsgsxx_baxx_zyryxx_str = null;
            if (null != gsgsxx_baxx_zyryxx_page) {
                gsgsxx_baxx_zyryxx_str = gsgsxx_baxx_zyryxx_page.asXml();
            }
            resultHtmlMap.put("gsgsxx_baxx_zyryxx", gsgsxx_baxx_zyryxx_str);

            // ??->?->?
            String gsgsxx_baxx_zgbmxx_url = "http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/getTzrxxAction.action?pripid="
                    + pripid + "&type=" + type;
            HtmlPage gsgsxx_baxx_zgbmxx_page = wc.getPage(webWindow,
                    new WebRequest(new URL(gsgsxx_baxx_zgbmxx_url)));
            String gsgsxx_baxx_zgbmxx_str = null;
            if (null != gsgsxx_baxx_zgbmxx_page) {
                gsgsxx_baxx_zgbmxx_str = gsgsxx_baxx_zgbmxx_page.asXml();
            }
            resultHtmlMap.put("gsgsxx_baxx_zgbmxx", gsgsxx_baxx_zgbmxx_str);

            wc.getOptions().setJavaScriptEnabled(true);
            // ??->?->?
            String gsgsxx_baxx_fzjgxx_url = "http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/getFgsxxAction.action?pripid="
                    + pripid + "&type=" + type;
            HtmlPage gsgsxx_baxx_fzjgxx_page = firstInfoPage.getWebClient().getPage(gsgsxx_baxx_fzjgxx_url);
            String gsgsxx_baxx_fzjgxx_str = null;
            if (null != gsgsxx_baxx_fzjgxx_page) {
                gsgsxx_baxx_fzjgxx_str = gsgsxx_baxx_fzjgxx_page.asXml();
            }
            resultHtmlMap.put("gsgsxx_baxx_fzjgxx", gsgsxx_baxx_fzjgxx_str);

            // ??->?->?
            String gsgsxx_baxx_qsxx_url = "http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/getQsxxAction.action?pripid="
                    + pripid + "&type=" + type;
            HtmlPage gsgsxx_baxx_qsxx_page = firstInfoPage.getWebClient().getPage(gsgsxx_baxx_qsxx_url);
            String gsgsxx_baxx_qsxx_str = null;
            if (null != gsgsxx_baxx_qsxx_page) {
                gsgsxx_baxx_qsxx_str = gsgsxx_baxx_qsxx_page.asXml();
            }
            resultHtmlMap.put("gsgsxx_baxx_qsxx", gsgsxx_baxx_qsxx_str);

            // ??->?->?
            String gsgsxx_dcdydjxx_dcdydjxx_url = "http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/getDcdydjAction.action?pripid="
                    + pripid + "&type=" + type;
            HtmlPage gsgsxx_dcdydjxx_dcdydjxx_page = firstInfoPage.getWebClient()
                    .getPage(gsgsxx_dcdydjxx_dcdydjxx_url);
            String gsgsxx_dcdydjxx_dcdydjxx_str = null;
            if (null != gsgsxx_dcdydjxx_dcdydjxx_page) {
                gsgsxx_dcdydjxx_dcdydjxx_str = gsgsxx_dcdydjxx_dcdydjxx_page.asXml();
            }
            resultHtmlMap.put("gsgsxx_dcdydjxx_dcdydjxx", gsgsxx_dcdydjxx_dcdydjxx_str);

            // ??->??->??
            String gsgsxx_gqczdjxx_gqczdjxx_url = "http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/getGsgsGqczxxAction.action?pripid="
                    + pripid + "&type=" + type;
            HtmlPage gsgsxx_gqczdjxx_gqczdjxx_page = firstInfoPage.getWebClient()
                    .getPage(gsgsxx_gqczdjxx_gqczdjxx_url);
            String gsgsxx_gqczdjxx_gqczdjxx_str = null;
            if (null != gsgsxx_gqczdjxx_gqczdjxx_page) {
                gsgsxx_gqczdjxx_gqczdjxx_str = gsgsxx_gqczdjxx_gqczdjxx_page.asXml();
            }
            resultHtmlMap.put("gsgsxx_gqczdjxx_gqczdjxx", gsgsxx_gqczdjxx_gqczdjxx_str);

            // ??->?->?
            String gsgsxx_xzcfxx_xzcfxx_url = "http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/getXzcfxxAction.action?pripid="
                    + pripid + "&type=" + type;
            HtmlPage gsgsxx_xzcfxx_xzcfxx_page = firstInfoPage.getWebClient().getPage(gsgsxx_xzcfxx_xzcfxx_url);
            String gsgsxx_xzcfxx_xzcfxx_str = null;
            if (null != gsgsxx_xzcfxx_xzcfxx_page) {
                gsgsxx_xzcfxx_xzcfxx_str = gsgsxx_xzcfxx_xzcfxx_page.asXml();
            }
            resultHtmlMap.put("gsgsxx_xzcfxx_xzcfxx", gsgsxx_xzcfxx_xzcfxx_str);

            // ??->???->???
            String gsgsxx_jyycxx_jyycxx_url = "http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/getJyycxxAction.action?pripid="
                    + pripid + "&type=" + type;
            HtmlPage gsgsxx_jyycxx_jyycxx_page = firstInfoPage.getWebClient().getPage(gsgsxx_jyycxx_jyycxx_url);
            String gsgsxx_jyycxx_jyycxx_str = null;
            if (null != gsgsxx_jyycxx_jyycxx_page) {
                gsgsxx_jyycxx_jyycxx_str = gsgsxx_jyycxx_jyycxx_page.asXml();
            }
            resultHtmlMap.put("gsgsxx_jyycxx_jyycxx", gsgsxx_jyycxx_jyycxx_str);

            // ??->???->???
            String gsgsxx_yzwfxx_yzwfxx_url = "http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/getYzwfxxAction.action?pripid="
                    + pripid + "&type=" + type;
            HtmlPage gsgsxx_yzwfxx_yzwfxx_page = firstInfoPage.getWebClient().getPage(gsgsxx_yzwfxx_yzwfxx_url);
            String gsgsxx_yzwfxx_yzwfxx_str = null;
            if (null != gsgsxx_yzwfxx_yzwfxx_page) {
                gsgsxx_yzwfxx_yzwfxx_str = gsgsxx_yzwfxx_yzwfxx_page.asXml();
            }
            resultHtmlMap.put("gsgsxx_yzwfxx_yzwfxx", gsgsxx_yzwfxx_yzwfxx_str);

            // ??->?->?
            String gsgsxx_ccjcxx_ccjcxx_url = "http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/getCcjcxxAction.action?pripid="
                    + pripid + "&type=" + type;
            HtmlPage gsgsxx_ccjcxx_ccjcxx_page = firstInfoPage.getWebClient().getPage(gsgsxx_ccjcxx_ccjcxx_url);
            String gsgsxx_ccjcxx_ccjcxx_str = null;
            if (null != gsgsxx_ccjcxx_ccjcxx_page) {
                gsgsxx_ccjcxx_ccjcxx_str = gsgsxx_ccjcxx_ccjcxx_page.asXml();
            }
            resultHtmlMap.put("gsgsxx_ccjcxx_ccjcxx", gsgsxx_ccjcxx_ccjcxx_str);

            // ???->?->
            String qygsxx_qynb_list_url = "http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/getQygsQynbxxAction.action?pripid="
                    + pripid + "&type=" + type;
            HtmlPage qygsxx_qynb_list_page = firstInfoPage.getWebClient().getPage(qygsxx_qynb_list_url);
            resultHtmlMap.put("qygsxx_qynb_list_page", qygsxx_qynb_list_page.asXml());

            // ???->?-> 1_3? ? ?
            // ???? ?????
            @SuppressWarnings("unchecked")
            List<HtmlElement> qygsxx_qynb_list_as = (List<HtmlElement>) qygsxx_qynb_list_page
                    .getByXPath("//tbody[@id='qynbItemContainer']/tr/td[2]/a");
            List<Map<String, Object>> qygsxx_qynb_infos = new ArrayList<Map<String, Object>>();
            if (qygsxx_qynb_list_as != null && !qygsxx_qynb_list_as.isEmpty()) {
                for (HtmlElement qygsxx_qynb_list_a : qygsxx_qynb_list_as) {
                    Map<String, Object> qygsxx_qynb_info_map = new LinkedHashMap<String, Object>();
                    String qygsxx_qynb_list_a_href = HOST_OF_LIAONING + qygsxx_qynb_list_a.getAttribute("href");
                    String qygsxx_qynb_list_a_text = qygsxx_qynb_list_a.getTextContent();
                    // String qygsxx_qynb_list_pubdate = ((HtmlElement)
                    // qygsxx_qynb_list_a
                    // .getParentNode().getNextSibling()).getTextContent();
                    qygsxx_qynb_info_map.put("qygsxx_qynb_list_a_href", qygsxx_qynb_list_a_href);
                    qygsxx_qynb_info_map.put("qygsxx_qynb_list_a_text", qygsxx_qynb_list_a_text);
                    // qygsxx_qynb_info_map.put("qygsxx_qynb_list_pubdate",
                    // qygsxx_qynb_list_pubdate);
                    // ????&(??|??)
                    // ????
                    // ?? ??????
                    // ?? 
                    // ?? ?
                    HtmlPage qygsxx_qynb_info_page = firstInfoPage.getWebClient()
                            .getPage(qygsxx_qynb_list_a_href);
                    qygsxx_qynb_info_map.put("qygsxx_qynb_info_page", qygsxx_qynb_info_page.asXml());
                    qygsxx_qynb_infos.add(qygsxx_qynb_info_map);
                }
            }
            resultHtmlMap.put("qygsxx_qynb_infos", qygsxx_qynb_infos);

            // ???->??
            String qygsxx_gdjczxx_url = "http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/getQygsJsGdjczxxAction.action?pripid="
                    + pripid + "&type=" + type;
            HtmlPage qygsxx_gdjczxx_page = firstInfoPage.getWebClient().getPage(qygsxx_gdjczxx_url);
            String qygsxx_gdjczxx_str = null;
            if (null != qygsxx_gdjczxx_page) {
                qygsxx_gdjczxx_str = qygsxx_gdjczxx_page.asXml();
            }
            resultHtmlMap.put("qygsxx_gdjczxx", qygsxx_gdjczxx_str);

            // ???->??->??
            String qygsxx_gdjczxx_bgxx_url = "http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/getQygsJsGdjczbgxxAction.action?pripid="
                    + pripid + "&type=" + type;
            HtmlPage qygsxx_gdjczxx_bgxx_page = firstInfoPage.getWebClient().getPage(qygsxx_gdjczxx_bgxx_url);
            String qygsxx_gdjczxx_bgxx_str = null;
            if (null != qygsxx_gdjczxx_page) {
                qygsxx_gdjczxx_bgxx_str = qygsxx_gdjczxx_bgxx_page.asXml();
            }
            resultHtmlMap.put("qygsxx_gdjczxx_bgxx", qygsxx_gdjczxx_bgxx_str);

            // ? ??->???
            String qygsxx_gqbgxx_url = "http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/getQygsJsGqbgxxAction.action?pripid="
                    + pripid + "&type=" + type;
            HtmlPage qygsxx_gqbgxx_page = firstInfoPage.getWebClient().getPage(qygsxx_gqbgxx_url);
            String qygsxx_gqbgxx_str = null;
            if (null != qygsxx_gqbgxx_page) {
                qygsxx_gqbgxx_str = qygsxx_gqbgxx_page.asXml();
            }
            resultHtmlMap.put("qygsxx_gqbgxx", qygsxx_gqbgxx_str);

            // ? ??->??
            String qygsxx_xzxkxx_url = "http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/getQygsJsXzxkxxAction.action?pripid="
                    + pripid + "&type=" + type;
            HtmlPage qygsxx_xzxkxx_page = firstInfoPage.getWebClient().getPage(qygsxx_xzxkxx_url);
            String qygsxx_xzxkxx_str = null;
            if (null != qygsxx_xzxkxx_page) {
                qygsxx_xzxkxx_str = qygsxx_xzxkxx_page.asXml();
            }
            resultHtmlMap.put("qygsxx_xzxkxx", qygsxx_xzxkxx_str);

            // ???->??
            String qygsxx_zscqczdjxx_url = "http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/getQygsJsZscqczxxAction.action?pripid="
                    + pripid + "&type=" + type;
            HtmlPage qygsxx_zscqczdjxx_page = firstInfoPage.getWebClient().getPage(qygsxx_zscqczdjxx_url);
            String qygsxx_zscqczdjxx_str = null;
            if (null != qygsxx_zscqczdjxx_page) {
                qygsxx_zscqczdjxx_str = qygsxx_zscqczdjxx_page.asXml();
            }
            resultHtmlMap.put("qygsxx_zscqczdjxx", qygsxx_zscqczdjxx_str);

            // ???->?
            String qygsxx_xzcfxx_url = "http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/getQygsJsXzcfxxAction.action?pripid="
                    + pripid + "&type=" + type;
            HtmlPage qygsxx_xzcfxx_page = firstInfoPage.getWebClient().getPage(qygsxx_xzcfxx_url);
            String qygsxx_xzcfxx_str = null;
            if (null != qygsxx_xzcfxx_page) {
                qygsxx_xzcfxx_str = qygsxx_xzcfxx_page.asXml();
            }
            resultHtmlMap.put("qygsxx_xzcfxx", qygsxx_xzcfxx_str);

            // ?????->??
            String gqdjxx_url = "http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/getSfgsGqdjxxAction.action?pripid="
                    + pripid + "&type=" + type;
            HtmlPage gqdjxx_page = firstInfoPage.getWebClient().getPage(gqdjxx_url);
            String sfxzgsxx_gqdjxx_str = null;
            if (null != gqdjxx_page) {
                sfxzgsxx_gqdjxx_str = gqdjxx_page.asXml();
            }
            resultHtmlMap.put("sfxzgsxx_gqdjxx", sfxzgsxx_gqdjxx_str);

            // ?????->??
            String gdbgxx_url = "http://gsxt.lngs.gov.cn/saicpub/entPublicitySC/entPublicityDC/getSfgsGdbgxxAction.action?pripid="
                    + pripid + "&type=" + type;
            HtmlPage gdbgxx_page = firstInfoPage.getWebClient().getPage(gdbgxx_url);
            String sfxzgsxx_gdbgxx_str = null;
            if (null != gdbgxx_page) {
                sfxzgsxx_gdbgxx_str = gdbgxx_page.asXml();
            }
            resultHtmlMap.put("sfxzgsxx_gdbgxx", sfxzgsxx_gdbgxx_str);

            resultHtmlMap.put("statusCodeDef", StatusCodeDef.SCCCESS);

        }
    }

    LOGGER.returnRedisResource();

    return resultHtmlMap;

}

From source file:com.dalthed.tucan.scraper.SingleEventScraper.java

/**
 * //from  www.j ava  2s  . co  m
 */
private void scrapeInformations(Iterator<Element> informationIterator) {

    while (informationIterator.hasNext()) {

        Element nextElement = informationIterator.next();

        Elements td = nextElement.select("td");
        if (td != null && td.hasClass("tbdata")) {
            Elements Paragraphs = nextElement.select("p");
            Iterator<Element> PaIt = Paragraphs.iterator();
            ArrayList<String> titles = new ArrayList<String>();
            ArrayList<String> values = new ArrayList<String>();

            while (PaIt.hasNext()) {

                Element next = PaIt.next();
                String[] information = crop(next.html());
                if (information[1].length() > 0) {
                    titles.add(information[0]);
                    values.add(information[1]);
                }

            }
            Log.i(LOG_TAG, "Informationscraper working");
            if (mPageAdapter != null) {
                Log.i(LOG_TAG, "InformationAdapter set");
                mPageAdapter.setAdapter(new TwoLinesAdapter(context, titles, values));
            }
        }
    }
}

From source file:com.github.irshulx.Components.InputExtensions.java

@Override
public Node buildNodeFromHTML(Element element) {
    String text;//from  w ww .j  a v  a2 s.  co  m
    int count;
    TextView tv;
    HtmlTag tag = HtmlTag.valueOf(element.tagName().toLowerCase());
    switch (tag) {
    case h1:
    case h2:
    case h3:
        RenderHeader(tag, element);
        break;
    case p:
    case div:
        text = element.html();
        count = editorCore.getParentView().getChildCount();
        tv = insertEditText(count, null, text);
        applyStyles(tv, element);
        break;
    case blockquote:
        text = element.html();
        count = editorCore.getParentView().getChildCount();
        tv = insertEditText(count, null, text);
        UpdateTextStyle(EditorTextStyle.BLOCKQUOTE, tv);
        applyStyles(tv, element);
    }
    return null;
}

From source file:com.near.chimerarevo.fragments.PostFragment.java

private void parseParagraphs(Elements ps) {
    for (Element p : ps) {
        if (!p.html().startsWith("&") && !p.html().startsWith("<iframe") && !p.html().startsWith("<!")
                && !p.html().contains("<h") && !p.html().contains("<ol") && !p.html().contains("<ul")
                && !p.html().contains("<pre") && !p.html().contains("<tr")) {
            parseNormalImages(p.select("img"));
            p.select("img").remove();

            Elements lnks = p.getElementsByTag("a");
            for (Element lnk : lnks) {
                if (lnk.attr("href").startsWith("#"))
                    lnk.removeAttr("href");
            }//from w ww.j av  a2  s .  c o m

            String txt = p.html().replace("<br />", "").replace("\n", "").trim();
            if (txt.length() > 0)
                addText(txt, true, Typeface.DEFAULT);
        }
    }
}