Example usage for org.jsoup.select Elements first

List of usage examples for org.jsoup.select Elements first

Introduction

In this page you can find the example usage for org.jsoup.select Elements first.

Prototype

public Element first() 

Source Link

Document

Get the first matched element.

Usage

From source file:GIST.IzbirkomExtractor.TableExtractor.java

/**
     * Tests the row if it looks like the 1st row of a parsable table
     * @param row/*from w ww.  j a v  a  2s .c om*/
     * @return
     */
    private boolean isParsableTable(Element row) {

        Elements cells = row.getElementsByTag("td");

        /* number of columns should be 4 */
        if (cells.size() != 4)
            return false;

        /* look for number signs in 1st cell*/
        if (StringUtils.getLevenshteinDistance(cleanupUNICODE(cells.first().text()),
                " . -") < 3)
            return true;

        /* discard the table if any of the cells is empty */
        for (Element cell : cells) {
            if (cleanupUNICODE(cell.text()).isEmpty())
                return false;
        }

        /* 1st column should be a number */
        try {
            Integer.parseInt(cleanupUNICODE(cells.first().text()).trim());
            return true;
        } catch (NumberFormatException e) {
            return false;
        }
    }

From source file:net.acesinc.convergentui.ConvergentUIResponseFilter.java

@Override
public Object run() {

    String origBody = contentManager.getDownstreamResponse();
    if (origBody == null || origBody.isEmpty()) {
        return null;
    }/*from  w w  w  . j av  a 2s  .c  o  m*/

    String composedBody = null;
    log.trace("Response from downstream server: " + origBody);

    Document doc = Jsoup.parse(origBody);
    if (hasReplaceableElements(doc)) {
        log.debug("We have replaceable elements. Let's get em!");
        Elements elementsToUpdate = doc.select("div[data-loc]");
        for (Element e : elementsToUpdate) {
            StringBuilder content = new StringBuilder();
            String location = e.dataset().get("loc");
            String fragmentName = e.dataset().get("fragment-name");
            String cacheName = e.dataset().get("cache-name");
            boolean useCaching = !Boolean.valueOf(e.dataset().get("disable-caching"));
            boolean failQuietly = Boolean.valueOf(e.dataset().get("fail-quietly"));
            URL url = null;
            try {
                url = new URL(location);
                String protocol = url.getProtocol();
                String service = url.getHost();

                log.debug("Fetching content at location [ " + location + " ] with cacheName = [ " + cacheName
                        + " ]");

                try {
                    RequestContext context = RequestContext.getCurrentContext();
                    ContentResponse response = contentManager.getContentFromService(location, cacheName,
                            useCaching, context);

                    log.trace(response.toString());

                    if (!response.isError()) {
                        Object resp = response.getContent();
                        if (String.class.isAssignableFrom(resp.getClass())) {
                            String subContentResponse = (String) resp;
                            //TODO You better trust the source of your downstream HTML!
                            //                    String cleanedContent = Jsoup.clean(subContentResponse, Whitelist.basic()); //this totally stripped the html out...
                            Document subDocument = Jsoup.parse(subContentResponse);

                            if (fragmentName != null) {
                                Elements fragments = subDocument
                                        .select("div[data-fragment-name=\"" + fragmentName + "\"]");

                                if (fragments != null && fragments.size() > 0) {
                                    if (fragments.size() == 1) {
                                        Element frag = fragments.first();

                                        //need to see if there are images that we need to replace the urls on
                                        Elements images = frag.select("img");
                                        for (Element i : images) {
                                            String src = i.attr("src");
                                            if (src.startsWith("/") && !src.startsWith("//")) {
                                                i.attr("src", "/cui-req://" + protocol + "://" + service + src);
                                            } //else what do we do about relative urls?
                                        }

                                        content.append(frag.toString());

                                    } else {
                                        for (Element frag : fragments) {
                                            content.append(frag.toString()).append("\n\n");
                                        }
                                    }
                                } else {
                                    log.debug("Found no matching fragments for [ " + fragmentName + " ]");
                                    if (failQuietly) {
                                        content.append("<div class='cui-error'></div>");
                                    } else {
                                        content.append(
                                                "<span class='cui-error'>Failed getting content from remote service. Possible reason in reponse below</span>");
                                        content.append(subDocument.toString());
                                    }
                                }
                            } else {
                                //take the whole thing and cram it in there!
                                content.append(subDocument.toString());
                            }
                        } else {
                            //not text...
                            if (!failQuietly) {
                                content.append(
                                        "<span class='cui-error'>Failed getting content from remote service. Reason: content was not text</span>");
                            } else {
                                content.append("<div class='cui-error'></div>");
                            }
                        }

                    } else {
                        if (!failQuietly) {
                            content.append(
                                    "<span class='cui-error'>Failed getting content from remote service. Reason: "
                                            + response.getMessage() + "</span>");
                        } else {
                            content.append("<div class='cui-error'></div>");
                        }
                    }

                    //now append it to the page
                    if (!content.toString().isEmpty()) {
                        e.html(content.toString());
                    }
                } catch (Throwable t) {
                    if (!failQuietly) {
                        e.html("<span class='cui-error'>Failed getting content from remote service. Reason: "
                                + t.getMessage() + "</span>");
                    }
                    log.warn("Failed replacing content", t);
                }
            } catch (MalformedURLException ex) {
                log.warn("location was invalid: [ " + location + " ]", ex);
                if (!failQuietly) {
                    content.append(
                            "<span class='cui-error'>Failed getting content from remote service. Reason: data-loc was an invalid location.</span>");
                } else {
                    content.append("<div class='cui-error'></div>");
                }
            }

        }

        composedBody = doc.toString();
    } else {
        log.debug("Document has no replaeable elements. Skipping");
    }

    try {
        addResponseHeaders();
        if (composedBody != null && !composedBody.isEmpty()) {
            writeResponse(composedBody, getMimeType(RequestContext.getCurrentContext()));
        } else {
            writeResponse(origBody, getMimeType(RequestContext.getCurrentContext()));
        }
    } catch (Exception ex) {
        log.error("Error sending response", ex);

    }
    return null;
}

From source file:com.quarterfull.newsAndroid.NewsDetailFragment.java

public void onCreateContextMenu(ContextMenu menu, View v, ContextMenu.ContextMenuInfo menuInfo) {
    if (v instanceof WebView) {
        WebView.HitTestResult result = ((WebView) v).getHitTestResult();
        if (result != null) {
            int type = result.getType();

            Document htmldoc = Jsoup.parse(html);

            FragmentTransaction ft = getFragmentManager().beginTransaction();

            if (type == WebView.HitTestResult.IMAGE_TYPE
                    || type == WebView.HitTestResult.SRC_IMAGE_ANCHOR_TYPE) {
                String imageUrl = result.getExtra();
                if (imageUrl.startsWith("http") || imageUrl.startsWith("file")) {

                    URL mImageUrl;
                    String imgtitle;
                    String imgaltval;
                    String imgsrcval;

                    imgsrcval = imageUrl.substring(imageUrl.lastIndexOf('/') + 1, imageUrl.length());
                    Elements imgtag = htmldoc.getElementsByAttributeValueContaining("src", imageUrl);

                    try {
                        imgtitle = imgtag.first().attr("title");
                    } catch (NullPointerException e) {
                        imgtitle = "";
                    }// w w w  . j  a v  a2s.co m
                    try {
                        imgaltval = imgtag.first().attr("alt");
                    } catch (NullPointerException e) {
                        imgaltval = "";
                    }
                    try {
                        mImageUrl = new URL(imageUrl);
                    } catch (MalformedURLException e) {
                        return;
                    }

                    String title = imgsrcval;
                    int titleIcon = android.R.drawable.ic_menu_gallery;
                    String text = (imgtitle.isEmpty()) ? imgaltval : imgtitle;

                    // Create and show the dialog.
                    DialogFragment newFragment = NewsDetailImageDialogFragment.newInstanceImage(title,
                            titleIcon, text, mImageUrl);
                    newFragment.show(ft, "menu_fragment_dialog");
                }
            } else if (type == WebView.HitTestResult.SRC_ANCHOR_TYPE) {
                String url = result.getExtra();
                URL mUrl;
                String text;
                try {
                    Elements urltag = htmldoc.getElementsByAttributeValueContaining("href", url);
                    text = urltag.text();
                    mUrl = new URL(url);
                } catch (MalformedURLException e) {
                    return;
                }

                // Create and show the dialog.
                DialogFragment newFragment = NewsDetailImageDialogFragment.newInstanceUrl(text,
                        mUrl.toString());
                newFragment.show(ft, "menu_fragment_dialog");
            }
            //else if (type == WebView.HitTestResult.EMAIL_TYPE) { }
            //else if (type == WebView.HitTestResult.GEO_TYPE) { }
            //else if (type == WebView.HitTestResult.PHONE_TYPE) { }
            //else if (type == WebView.HitTestResult.EDIT_TEXT_TYPE) { }
        }
    }
}

From source file:com.gumtreescraper.scraper.GumtreeScraper.java

public void scrapeWithJSoup(List<Gumtree> gumtrees, String url) throws IOException {

    //        openSite(url);
    //            waitForPageToLoad();

    String nextPageUrl = url;//from  w w  w .java2  s  . c o m
    boolean needContinue = true;
    do {

        try {
            Document doc = Jsoup.connect(nextPageUrl).timeout(getTimeout() * 1000).userAgent("Mozilla")
                    //                     .userAgent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36")
                    .get();
            Elements adElements = doc.select("#srchrslt-adtable > li");
            int size = adElements.size();
            for (int i = 0; i < size; i++) {
                Element ad = adElements.get(i);

                if (!isOwner(ad)) {
                    continue;
                }

                Element linkElement = ad.select("h6.rs-ad-title > a").first();

                if (linkElement == null) {
                    System.out.print(ad);
                    continue;
                }

                String adUrl = linkElement.attr("href");
                Gumtree gumtree = new Gumtree();
                gumtree.setUrl(BASE_URL + adUrl);
                gumtrees.add(gumtree);

                if (i == size - 1) { // last element
                    Elements adDateElements = ad.select("div.rs-ad-date");
                    if (adDateElements.isEmpty()) {
                        continue;
                    }

                    if (!needToScrapeNextPage(adDateElements.first().text().trim())) {
                        needContinue = false;
                    }
                }
            }

            Elements nextElements = doc.select("a.rs-paginator-btn.next");
            if (nextElements.isEmpty()) {
                break;
            }

            nextPageUrl = BASE_URL + nextElements.first().attr("href");
            System.out.println("next page: " + nextPageUrl);
        } catch (Exception oex) {
            System.out.println(oex);
        }
    } while (true && needContinue);
}

From source file:com.liato.bankdroid.banking.banks.coop.Coop.java

@Override
public void update() throws BankException, LoginException, BankChoiceException {
    super.update();
    if (username == null || password == null || username.length() == 0 || password.length() == 0) {
        throw new LoginException(res.getText(R.string.invalid_username_password).toString());
    }/*from   www  . j  ava 2 s  . c  o  m*/

    login();

    try {
        for (AccountType at : AccountType.values()) {
            response = urlopen.open(at.getUrl());
            Document d = Jsoup.parse(response);
            Elements historik = d.select("#historik section");
            TransactionParams params = new TransactionParams();
            mTransactionParams.put(at, params);
            if (historik != null && !historik.isEmpty()) {
                String data = historik.first().attr("data-controller");
                Matcher m = rePageGuid.matcher(data);
                if (m.find()) {
                    params.setPageGuid(m.group(1));
                }
            }
            Element date = d.getElementById("dateFrom");
            if (date != null) {
                params.setMinDate(date.hasAttr("min") ? date.attr("min") : null);
                params.setMaxDate(date.hasAttr("max") ? date.attr("max") : null);
            }
            Elements es = d.select(".List:contains(Saldo)");
            if (es != null && !es.isEmpty()) {
                List<String> names = new ArrayList<String>();
                List<String> values = new ArrayList<String>();
                for (Element e : es.first().select("dt")) {
                    names.add(e.text().replaceAll(":", "").trim());
                }
                for (Element e : es.first().select("dd")) {
                    values.add(e.text().trim());
                }
                for (int i = 0; i < Math.min(names.size(), values.size()); i++) {
                    Account a = new Account(names.get(i), Helpers.parseBalance(values.get(i)),
                            String.format("%s%d", at.getPrefix(), i));
                    a.setCurrency(Helpers.parseCurrency(values.get(i), "SEK"));
                    if (a.getName().toLowerCase().contains("disponibelt")) {
                        a.setType(Account.REGULAR);
                        balance = a.getBalance();
                        setCurrency(a.getCurrency());
                    } else {
                        a.setType(Account.OTHER);
                    }

                    if (i > 0) {
                        a.setAliasfor(String.format("%s%d", at.getPrefix(), 0));
                    }
                    accounts.add(a);
                }
            }
        }
    } catch (ClientProtocolException e) {
        e.printStackTrace();
        throw new BankException(e.getMessage());
    } catch (IOException e) {
        e.printStackTrace();
        throw new BankException(e.getMessage());
    }

    try {
        RefundSummaryRequest refsumReq = new RefundSummaryRequest(mUserId, mToken, APPLICATION_ID);
        HttpEntity e = new StringEntity(getObjectmapper().writeValueAsString(refsumReq));
        InputStream is = urlopen
                .openStream("https://www.coop.se/ExternalServices/RefundService.svc/RefundSummary", e, true);
        RefundSummaryResponse refsumResp = readJsonValue(is, RefundSummaryResponse.class);
        if (refsumResp != null && refsumResp.getRefundSummaryResult() != null) {
            Account a = new Account("terbring p ditt kort",
                    BigDecimal.valueOf(refsumResp.getRefundSummaryResult().getAccountBalance()), "refsummary");
            a.setCurrency("SEK");
            if (accounts.isEmpty()) {
                balance = a.getBalance();
                setCurrency(a.getCurrency());
            }
            accounts.add(a);
            a = new Account(
                    String.format("terbring fr %s", refsumResp.getRefundSummaryResult().getMonthName()),
                    BigDecimal.valueOf(refsumResp.getRefundSummaryResult().getTotalRefund()),
                    "refsummary_month");
            accounts.add(a);
        }
    } catch (JsonParseException e) {
        e.printStackTrace();
        throw new BankException(e.getMessage());
    } catch (ClientProtocolException e) {
        e.printStackTrace();
        throw new BankException(e.getMessage());
    } catch (IOException e) {
        e.printStackTrace();
        throw new BankException(e.getMessage());
    }

    if (accounts.isEmpty()) {
        throw new BankException(res.getText(R.string.no_accounts_found).toString());
    }
    super.updateComplete();
}

From source file:org.keycloak.testsuite.util.saml.ModifySamlResponseStepBuilder.java

private HttpUriRequest handlePostBinding(CloseableHttpResponse currentResponse) throws Exception {
    assertThat(currentResponse, statusCodeIsHC(Status.OK));

    final String htmlBody = EntityUtils.toString(currentResponse.getEntity());
    assertThat(htmlBody, Matchers.containsString("SAML"));
    org.jsoup.nodes.Document theResponsePage = Jsoup.parse(htmlBody);
    Elements samlResponses = theResponsePage.select("input[name=SAMLResponse]");
    Elements samlRequests = theResponsePage.select("input[name=SAMLRequest]");
    Elements forms = theResponsePage.select("form");
    Elements relayStates = theResponsePage.select("input[name=RelayState]");
    int size = samlResponses.size() + samlRequests.size();
    assertThat("Checking uniqueness of SAMLResponse/SAMLRequest input field in the page", size, is(1));
    assertThat("Checking uniqueness of forms in the page", forms, hasSize(1));

    Element respElement = samlResponses.isEmpty() ? samlRequests.first() : samlResponses.first();
    Element form = forms.first();

    String base64EncodedSamlDoc = respElement.val();
    InputStream decoded = PostBindingUtil.base64DecodeAsStream(base64EncodedSamlDoc);
    String samlDoc = IOUtils.toString(decoded, GeneralConstants.SAML_CHARSET);
    IOUtils.closeQuietly(decoded);/*from w w  w .j av  a 2  s .c o  m*/

    String transformed = getTransformer().transform(samlDoc);
    if (transformed == null) {
        return null;
    }

    final String attributeName = this.targetAttribute != null ? this.targetAttribute : respElement.attr("name");
    List<NameValuePair> parameters = new LinkedList<>();

    if (!relayStates.isEmpty()) {
        parameters.add(new BasicNameValuePair(GeneralConstants.RELAY_STATE, relayStates.first().val()));
    }
    URI locationUri = this.targetUri != null ? this.targetUri : URI.create(form.attr("action"));

    return createRequest(locationUri, attributeName, transformed, parameters);
}

From source file:fr.eolya.extraction.tika.TikaWrapper.java

private String getMetaContent(Document doc, String metaName) {
    Elements e = doc.select("meta[name=" + metaName + "]");
    if (e == null || e.first() == null)
        return null;
    return e.first().attr("content");
}

From source file:net.kevxu.purdueassist.course.CatalogDetail.java

private CatalogDetailEntry parseDocument(Document document)
        throws HtmlParseException, CourseNotFoundException, IOException {
    CatalogDetailEntry entry = new CatalogDetailEntry(subject, cnbr);
    Elements tableElements = document.getElementsByAttributeValue("summary",
            "This table lists the course detail for the selected term.");
    if (tableElements.isEmpty() != true) {
        // get name
        try {//from   ww  w.  ja v a 2 s .c  o m
            Element body = tableElements.first().select("tbody").first();
            String nameBlock = body.select("tr td.nttitle").first().text();
            String[] temp = nameBlock.split(subject.name() + " " + String.valueOf(cnbr));
            String name = temp[temp.length - 1].substring(3);
            entry.setName(name);

            // get description
            body = body.select(".ntdefault").first();
            String text = body.text();
            int split = text.indexOf("Levels:");
            String description = text.substring(0, split);
            description = description.substring(20);
            entry.setDescription(description);

            // get levels
            int begin = split;
            int end = text.indexOf("Schedule Types:");
            String levels = text.substring(begin + 8, end);
            temp = levels.split("[ ,]");
            List<String> lvs = new ArrayList<String>();
            for (String s : temp)
                if (!s.equals("")) {
                    lvs.add(s);
                }
            entry.setLevels(lvs);

            // get type and prerequisites
            List<Type> types = new ArrayList<Type>();
            List<String> preq = new ArrayList<String>();
            Elements parsing_A = body.select("a");
            for (Element e : parsing_A) {
                if (e.attr("href").contains("schd_in") && !(e.attr("href").contains("%"))) {

                    try {
                        types.add(Type.valueOf(e.text().replace(" ", "")));
                    } catch (Exception exception) {
                        throw new HtmlParseException();
                    }
                } else if (e.attr("href").contains("sel_attr=")) {
                    preq.add(e.text());
                }
            }
            if (types.size() > 0)
                entry.setType(types);
            if (preq.size() > 0)
                entry.setPrerequisites(preq);

            // get offered by
            begin = text.indexOf("Offered By:");
            end = text.indexOf("Department:");
            if (end < 0)
                end = text.indexOf("Course Attributes:");
            if (end > 0) {
                entry.setOfferedBy(text.substring(begin + 12, end - 1));
            }

            // get department
            begin = text.indexOf("Department:");
            if (begin > 0) {
                end = text.indexOf("Course Attributes:");
                entry.setDepartment((text.substring(begin + 12, end - 1)));
            }

            // get campus
            begin = text.indexOf("May be offered at any of the following campuses:");
            String campuses;
            end = text.indexOf("Repeatable for Additional Credit:");
            if (end < 0)
                end = text.indexOf("Learning Objectives:");
            if (end < 0)
                end = text.indexOf("Restrictions:");
            if (end < 0)
                end = text.indexOf("Corequisites:");
            if (end < 0)
                end = text.indexOf("Prerequisites:");
            if (end < 0) {
                campuses = text
                        .substring(begin + "May be offered at any of the following campuses:".length() + 5);
            } else {
                campuses = text.substring(
                        begin + "May be offered at any of the following campuses:".length() + 5, end - 1);
            }
            temp = campuses.replace("    ", "#").split("#");
            List<String> camps = new ArrayList<String>();
            for (String s : temp) {
                if (s.length() > 1) {
                    camps.add(s);
                }

            }
            entry.setCampuses(camps);

            // get restrictions
            begin = text.indexOf("Restrictions:");
            end = text.indexOf("Corequisites:");
            if (end < 0)
                end = text.indexOf("Prerequisites:");
            if (begin > 0 && end < 0) {
                entry.setRestrictions(
                        text.substring(begin + "Restrictions:".length()).replace("      ", "\n"));
            } else if (begin > 0) {
                entry.setRestrictions(
                        text.substring(begin + "Restrictions:".length(), end).replace("      ", "\n"));
            }

        } catch (StringIndexOutOfBoundsException e) {
            // no type, not available
            // System.out.println("-----------");
            // System.out.println("Error for cnbr = " + cnbr);
            // System.out.println("-----------");
        }
    } else {
        throw new CourseNotFoundException();
    }

    return entry;
}

From source file:com.adarshahd.indianrailinfo.donate.PNRStat.java

private void createTableLayoutTrnDtls() {
    if (mPageResult.contains("FLUSHED PNR / ") || mPageResult.contains("Invalid PNR")) {
        mTextViewPNRSts.setText("The PNR entered is either invalid or expired! Please check.");
        mFrameLayout.removeAllViews();/*from  ww  w . ja  v a  2 s  .c om*/
        mFrameLayout.addView(mTextViewPNRSts);
        return;
    }
    if (mPageResult.contains("Connectivity Failure") || mPageResult.contains("try again")) {
        mTextViewPNRSts.setText("Looks like server is busy or currently unavailable. Please try again later!");
        mFrameLayout.removeAllViews();
        mFrameLayout.addView(mTextViewPNRSts);
        return;
    }
    List<String> trainList;
    if (mTrainDetails == null || mTrainDetails.getPNR() != mPNRNumber) {
        Elements eleTrain = Jsoup.parse(mPageResult).select("table tr tr td:containsOwn(Train Number)");
        Iterator iteTrain = null;
        try {
            iteTrain = eleTrain.first().parent().parent().parent().getElementsByTag("tr").iterator();
        } catch (Exception e) {
            Log.i("PNRStat", mPageResult);
            return;
        }
        trainList = new ArrayList<String>();
        Element tmp;
        //Get the third row for train details
        iteTrain.next();
        iteTrain.next();
        if (iteTrain.hasNext()) {
            tmp = (Element) iteTrain.next();
            trainList.add(tmp.select("td").get(0).text());
            trainList.add(tmp.select("td").get(1).text());
            trainList.add(tmp.select("td").get(2).text());
            trainList.add(tmp.select("td").get(5).text());
            trainList.add(tmp.select("td").get(6).text());
            trainList.add(tmp.select("td").get(7).text());
        }
        mTrainDetails = new TrainDetails(trainList, mPNRNumber);
    } else {
        trainList = mTrainDetails.getTrainDetails();
    }
    mTableLayoutTrn = new TableLayout(mActivity);
    mTableLayoutTrn.setLayoutParams(new FrameLayout.LayoutParams(ViewGroup.LayoutParams.MATCH_PARENT,
            ViewGroup.LayoutParams.WRAP_CONTENT));
    TableRow row = new TableRow(mActivity);
    mStrTrainDetails = new String();
    row.setLayoutParams(new FrameLayout.LayoutParams(ViewGroup.LayoutParams.MATCH_PARENT,
            ViewGroup.LayoutParams.WRAP_CONTENT));
    for (String list : trainList) {
        TextView tv = new TextView(mActivity);
        tv.setText(list);
        tv.setPadding(10, 10, 10, 10);
        tv.setTextAppearance(mActivity, android.R.style.TextAppearance_DeviceDefault_Small);
        row.addView(tv);
        mStrTrainDetails += list + " ";
    }
    row.setBackgroundResource(R.drawable.card_background);
    row.setGravity(Gravity.CENTER_HORIZONTAL | Gravity.CENTER_VERTICAL);
    mTableLayoutTrn.addView(row);
}

From source file:cn.edu.hfut.dmic.contentextractor.ContentExtractor.java

/**
 * metaTitle?metaTitle,metaTitle??????title
 *
 * @param contentElement/*from   w  w w .  j  a  v  a  2 s. co m*/
 * @return
 * @throws Exception
 */
protected String getTitle(final Element contentElement) throws Exception {
    final ArrayList<Element> titleList = new ArrayList<Element>();
    final ArrayList<Double> titleSim = new ArrayList<Double>();
    final String metaTitle = getText(doc.title().trim());
    if (!metaTitle.isEmpty()) {
        doc.body().traverse(new NodeVisitor() {
            @Override
            public void head(Node node, int i) {
                if (node instanceof Element) {
                    Element tag = (Element) node;
                    String tagName = tag.tagName();
                    if (Pattern.matches("h[1-6]", tagName)) {
                        String title = tag.text().trim();
                        double sim = strSim(title, metaTitle);
                        titleSim.add(sim);
                        titleList.add(tag);
                    }
                }
            }

            @Override
            public void tail(Node node, int i) {
            }
        });
        int index = titleSim.size();
        if (index >= 0) {
            double maxScore = 0;
            int maxIndex = -1;
            for (int i = 0; i < index; i++) {
                double score = (i + 1) * titleSim.get(i);
                if (score > maxScore) {
                    maxScore = score;
                    maxIndex = i;
                }
            }

            if (maxIndex == -1 || titleSim.get(maxIndex) < 0.3) {
                String title = getText(metaTitle);
                if (!title.endsWith("") && title.length() > 7) {
                    return title;
                }
                Collections.sort(titleList, new Comparator<Element>() {
                    @Override
                    public int compare(Element o1, Element o2) {
                        int len1 = 1;
                        int len2 = 1;
                        if (o1.text().replaceAll("[^\\u4e00-\\u9fa5]", "").length() > 26
                                || o1.text().replaceAll("[^\\u4e00-\\u9fa5]", "").length() < 7) {
                            len1 = 0;
                        }
                        if (o2.text().replaceAll("[^\\u4e00-\\u9fa5]", "").length() > 26
                                || o2.text().replaceAll("[^\\u4e00-\\u9fa5]", "").length() < 7) {
                            len2 = 0;
                        }
                        if (len1 == len2) {
                            return o1.tagName().charAt(1) - o2.tagName().charAt(1);
                        }
                        return len2 - len1;
                    }
                });
                return getText(titleList.get(0).text());
            }
            return titleList.get(maxIndex).text();
        }
    }

    /**
     * ?
     */
    Elements titles = doc.body().select("*[id^=title],*[id$=title],*[class^=title],*[class$=title]");
    if (titles.size() > 0) {
        String title = titles.first().text();
        if (title.length() > 5 && title.length() < 40) {
            return titles.first().text();
        }
    }
    try {
        return getTitleByEditDistance(contentElement);
    } catch (Exception ex) {
        throw new Exception("title not found");
    }

}