Example usage for org.jsoup.nodes Element html

List of usage examples for org.jsoup.nodes Element html

Introduction

In this page you can find the example usage for org.jsoup.nodes Element html.

Prototype

public String html() 

Source Link

Document

Retrieves the element's inner HTML.

Usage

From source file:net.pixomania.crawler.W3C.parser.rules.principalAuthors.PrincipalAuthorsRule1.java

@Override
public ArrayList<Person> run(String url, Document doc) {
    ArrayList<Person> editorList = new ArrayList<>();

    Elements editors = doc.select("dt:contains(Principal Author) ~ dd");
    if (editors.size() == 0)
        return null;

    boolean skip = false;
    for (Element editor : editors) {
        Element prev = editor.previousElementSibling();
        if (prev.tagName().equals("dt")) {
            if (!prev.text().trim().toLowerCase().startsWith("principal author")) {
                skip = true;/*from   w ww.  j av a 2 s . co  m*/
            }
        }

        if (skip) {
            Element next = editor.nextElementSibling();
            if (next != null) {
                if (next.text().trim().toLowerCase().startsWith("principal author")) {
                    skip = false;
                    continue;
                }
            }
            continue;
        }

        String[] splitted = editor.html().split(",");

        for (String split : splitted) {
            if (!split.isEmpty()) {
                if (split.toLowerCase().startsWith("(in alphabetic")
                        || split.toLowerCase().startsWith("see acknowl")
                        || split.toLowerCase().startsWith("the w3") || split.toLowerCase().startsWith("(see ac")
                        || split.toLowerCase().startsWith("see participants")
                        || split.toLowerCase().contains("note:")) {
                    Log.log("warning", "Spec " + url + " may refer to a different section!");
                    continue;
                }
                if (split.equals("WHATWG:") || split.equals("W3C:"))
                    continue;
                Document newdoc = Jsoup.parse(split.replaceAll("\n", ""));
                Person result = NameParser.parse(newdoc.text());
                if (result == null)
                    continue;

                for (int i = 0; i < newdoc.select("a").size(); i++) {
                    if (!newdoc.select("a").get(i).attr("href").isEmpty()) {
                        if (newdoc.select("a").get(i).attr("href").contains("@")) {
                            result.setEmail(newdoc.select("a").get(i).attr("href").replace("mailto:", ""));
                        } else {
                            result.addWebsite(newdoc.select("a").get(i).attr("href"));
                        }
                    }
                }

                editorList.add(result);
            }
        }
    }

    if (editorList.size() == 0)
        return null;

    return editorList;
}

From source file:me.vertretungsplan.parser.UntisCommonParser.java

/**
 * Parses a "Nachrichten zum Tag" ("daily news") table from an Untis schedule
 *
 * @param table the <code>table</code>-Element to be parsed
 * @param day   the {@link SubstitutionScheduleDay} where the messages should be stored
 *///  w  w  w .  j  ava 2s.  co m
private void parseMessages(Element table, SubstitutionScheduleDay day) {
    Elements zeilen = table.select("tr:not(:contains(Nachrichten zum Tag))");
    for (Element i : zeilen) {
        Elements spalten = i.select("td");
        String info = "";
        for (Element b : spalten) {
            info += "\n" + TextNode.createFromEncoded(b.html(), null).getWholeText();
        }
        info = info.substring(1); // remove first \n
        day.addMessage(info);
    }
}

From source file:biz.shadowservices.DegreesToolbox.DataFetcher.java

public FetchResult updateData(Context context, boolean force) {
    //Open database
    DBOpenHelper dbhelper = new DBOpenHelper(context);
    SQLiteDatabase db = dbhelper.getWritableDatabase();

    // check for internet connectivity
    try {//  w ww.  j a  va  2s  .c om
        if (!isOnline(context)) {
            Log.d(TAG, "We do not seem to be online. Skipping Update.");
            return FetchResult.NOTONLINE;
        }
    } catch (Exception e) {
        exceptionReporter.reportException(Thread.currentThread(), e, "Exception during isOnline()");
    }
    SharedPreferences sp = PreferenceManager.getDefaultSharedPreferences(context);
    if (!force) {
        try {
            if (sp.getBoolean("loginFailed", false) == true) {
                Log.d(TAG, "Previous login failed. Skipping Update.");
                DBLog.insertMessage(context, "i", TAG, "Previous login failed. Skipping Update.");
                return FetchResult.LOGINFAILED;
            }
            if (sp.getBoolean("autoupdates", true) == false) {
                Log.d(TAG, "Automatic updates not enabled. Skipping Update.");
                DBLog.insertMessage(context, "i", TAG, "Automatic updates not enabled. Skipping Update.");
                return FetchResult.NOTALLOWED;
            }
            if (!isBackgroundDataEnabled(context) && sp.getBoolean("obeyBackgroundData", true)) {
                Log.d(TAG, "Background data not enabled. Skipping Update.");
                DBLog.insertMessage(context, "i", TAG, "Background data not enabled. Skipping Update.");
                return FetchResult.NOTALLOWED;
            }
            if (!isAutoSyncEnabled() && sp.getBoolean("obeyAutoSync", true)
                    && sp.getBoolean("obeyBackgroundData", true)) {
                Log.d(TAG, "Auto sync not enabled. Skipping Update.");
                DBLog.insertMessage(context, "i", TAG, "Auto sync not enabled. Skipping Update.");
                return FetchResult.NOTALLOWED;
            }
            if (isWifi(context) && !sp.getBoolean("wifiUpdates", true)) {
                Log.d(TAG, "On wifi, and wifi auto updates not allowed. Skipping Update");
                DBLog.insertMessage(context, "i", TAG,
                        "On wifi, and wifi auto updates not allowed. Skipping Update");
                return FetchResult.NOTALLOWED;
            } else if (!isWifi(context)) {
                Log.d(TAG, "We are not on wifi.");
                if (!isRoaming(context) && !sp.getBoolean("2DData", true)) {
                    Log.d(TAG, "Automatic updates on 2Degrees data not enabled. Skipping Update.");
                    DBLog.insertMessage(context, "i", TAG,
                            "Automatic updates on 2Degrees data not enabled. Skipping Update.");
                    return FetchResult.NOTALLOWED;
                } else if (isRoaming(context) && !sp.getBoolean("roamingData", false)) {
                    Log.d(TAG, "Automatic updates on roaming mobile data not enabled. Skipping Update.");
                    DBLog.insertMessage(context, "i", TAG,
                            "Automatic updates on roaming mobile data not enabled. Skipping Update.");
                    return FetchResult.NOTALLOWED;
                }

            }
        } catch (Exception e) {
            exceptionReporter.reportException(Thread.currentThread(), e,
                    "Exception while finding if to update.");
        }

    } else {
        Log.d(TAG, "Update Forced");
    }

    try {
        String username = sp.getString("username", null);
        String password = sp.getString("password", null);
        if (username == null || password == null) {
            DBLog.insertMessage(context, "i", TAG, "Username or password not set.");
            return FetchResult.USERNAMEPASSWORDNOTSET;
        }

        // Find the URL of the page to send login data to.
        Log.d(TAG, "Finding Action. ");
        HttpGetter loginPageGet = new HttpGetter("https://secure.2degreesmobile.co.nz/web/ip/login");
        String loginPageString = loginPageGet.execute();
        if (loginPageString != null) {
            Document loginPage = Jsoup.parse(loginPageString,
                    "https://secure.2degreesmobile.co.nz/web/ip/login");
            Element loginForm = loginPage.getElementsByAttributeValue("name", "loginFrm").first();
            String loginAction = loginForm.attr("action");
            // Send login form
            List<NameValuePair> loginValues = new ArrayList<NameValuePair>();
            loginValues.add(new BasicNameValuePair("externalURLRedirect", ""));
            loginValues.add(new BasicNameValuePair("hdnAction", "login_userlogin"));
            loginValues.add(new BasicNameValuePair("hdnAuthenticationType", "M"));
            loginValues.add(new BasicNameValuePair("hdnlocale", ""));

            loginValues.add(new BasicNameValuePair("userid", username));
            loginValues.add(new BasicNameValuePair("password", password));
            Log.d(TAG, "Sending Login ");
            HttpPoster sendLoginPoster = new HttpPoster(loginAction, loginValues);
            // Parse result

            String loginResponse = sendLoginPoster.execute();
            Document loginResponseParsed = Jsoup.parse(loginResponse);
            // Determine if this is a pre-pay or post-paid account.
            boolean postPaid;
            if (loginResponseParsed
                    .getElementById("p_CustomerPortalPostPaidHomePage_WAR_customerportalhomepage") == null) {
                Log.d(TAG, "Pre-pay account or no account.");
                postPaid = false;
            } else {
                Log.d(TAG, "Post-paid account.");
                postPaid = true;
            }

            String homepageUrl = "https://secure.2degreesmobile.co.nz/group/ip/home";
            if (postPaid) {
                homepageUrl = "https://secure.2degreesmobile.co.nz/group/ip/postpaid";
            }
            HttpGetter homepageGetter = new HttpGetter(homepageUrl);
            String homepageHTML = homepageGetter.execute();
            Document homePage = Jsoup.parse(homepageHTML);

            Element accountSummary = homePage.getElementById("accountSummary");
            if (accountSummary == null) {
                Log.d(TAG, "Login failed.");
                return FetchResult.LOGINFAILED;
            }
            db.delete("cache", "", null);
            /* This code fetched some extra details for postpaid users, but on reflection they aren't that useful.
             * Might reconsider this.
             *
             if (postPaid) {
                     
               Element accountBalanceSummaryTable = accountSummary.getElementsByClass("tableBillSummary").first();
               Elements rows = accountBalanceSummaryTable.getElementsByTag("tr");
               int rowno = 0;
               for (Element row : rows) {
                  if (rowno > 1) {
             break;
                  }
                  //Log.d(TAG, "Starting row");
                  //Log.d(TAG, row.html());
                  Double value;
                  try {
             Element amount = row.getElementsByClass("tableBillamount").first();
             String amountHTML = amount.html();
             Log.d(TAG, amountHTML.substring(1));
             value = Double.parseDouble(amountHTML.substring(1));
                  } catch (Exception e) {
             Log.d(TAG, "Failed to parse amount from row.");
             value = null;
                  }
                  String expiresDetails = "";
                  String expiresDate = null;
                  String name = null;
                  try {
             Element details = row.getElementsByClass("tableBilldetail").first();
             name = details.ownText();
             Element expires = details.getElementsByTag("em").first();
             if (expires != null) {
                 expiresDetails = expires.text();
             } 
             Log.d(TAG, expiresDetails);
             Pattern pattern;
             pattern = Pattern.compile("\\(payment is due (.*)\\)");
             Matcher matcher = pattern.matcher(expiresDetails);
             if (matcher.find()) {
                /*Log.d(TAG, "matched expires");
                Log.d(TAG, "group 0:" + matcher.group(0));
                Log.d(TAG, "group 1:" + matcher.group(1));
                Log.d(TAG, "group 2:" + matcher.group(2)); *
                String expiresDateString = matcher.group(1);
                Date expiresDateObj;
                if (expiresDateString != null) {
                   if (expiresDateString.length() > 0) {
                      try {
                         expiresDateObj = DateFormatters.EXPIRESDATE.parse(expiresDateString);
                         expiresDate = DateFormatters.ISO8601DATEONLYFORMAT.format(expiresDateObj);
                      } catch (java.text.ParseException e) {
                         Log.d(TAG, "Could not parse date: " + expiresDateString);
                      }
                   }   
                }
             }
                  } catch (Exception e) {
             Log.d(TAG, "Failed to parse details from row.");
                  }
                  String expirev = null;
                  ContentValues values = new ContentValues();
                  values.put("name", name);
                  values.put("value", value);
                  values.put("units", "$NZ");
                  values.put("expires_value", expirev );
                  values.put("expires_date", expiresDate);
                  db.insert("cache", "value", values );
                  rowno++;
               }
            } */
            Element accountSummaryTable = accountSummary.getElementsByClass("tableAccountSummary").first();
            Elements rows = accountSummaryTable.getElementsByTag("tr");
            for (Element row : rows) {
                // We are now looking at each of the rows in the data table.
                //Log.d(TAG, "Starting row");
                //Log.d(TAG, row.html());
                Double value;
                String units;
                try {
                    Element amount = row.getElementsByClass("tableBillamount").first();
                    String amountHTML = amount.html();
                    //Log.d(TAG, amountHTML);
                    String[] amountParts = amountHTML.split("&nbsp;", 2);
                    //Log.d(TAG, amountParts[0]);
                    //Log.d(TAG, amountParts[1]);
                    if (amountParts[0].contains("Included") || amountParts[0].equals("All You Need")
                            || amountParts[0].equals("Unlimited Text*")) {
                        value = Values.INCLUDED;
                    } else {
                        try {
                            value = Double.parseDouble(amountParts[0]);
                        } catch (NumberFormatException e) {
                            exceptionReporter.reportException(Thread.currentThread(), e, "Decoding value.");
                            value = 0.0;
                        }
                    }
                    units = amountParts[1];
                } catch (NullPointerException e) {
                    //Log.d(TAG, "Failed to parse amount from row.");
                    value = null;
                    units = null;
                }
                Element details = row.getElementsByClass("tableBilldetail").first();
                String name = details.getElementsByTag("strong").first().text();
                Element expires = details.getElementsByTag("em").first();
                String expiresDetails = "";
                if (expires != null) {
                    expiresDetails = expires.text();
                }
                Log.d(TAG, expiresDetails);
                Pattern pattern;
                if (postPaid == false) {
                    pattern = Pattern.compile("\\(([\\d\\.]*) ?\\w*? ?expiring on (.*)\\)");
                } else {
                    pattern = Pattern.compile("\\(([\\d\\.]*) ?\\w*? ?will expire on (.*)\\)");
                }
                Matcher matcher = pattern.matcher(expiresDetails);
                Double expiresValue = null;
                String expiresDate = null;
                if (matcher.find()) {
                    /*Log.d(TAG, "matched expires");
                    Log.d(TAG, "group 0:" + matcher.group(0));
                    Log.d(TAG, "group 1:" + matcher.group(1));
                    Log.d(TAG, "group 2:" + matcher.group(2)); */
                    try {
                        expiresValue = Double.parseDouble(matcher.group(1));
                    } catch (NumberFormatException e) {
                        expiresValue = null;
                    }
                    String expiresDateString = matcher.group(2);
                    Date expiresDateObj;
                    if (expiresDateString != null) {
                        if (expiresDateString.length() > 0) {
                            try {
                                expiresDateObj = DateFormatters.EXPIRESDATE.parse(expiresDateString);
                                expiresDate = DateFormatters.ISO8601DATEONLYFORMAT.format(expiresDateObj);
                            } catch (java.text.ParseException e) {
                                Log.d(TAG, "Could not parse date: " + expiresDateString);
                            }
                        }
                    }
                }
                ContentValues values = new ContentValues();
                values.put("name", name);
                values.put("value", value);
                values.put("units", units);
                values.put("expires_value", expiresValue);
                values.put("expires_date", expiresDate);
                db.insert("cache", "value", values);
            }

            if (postPaid == false) {
                Log.d(TAG, "Getting Value packs...");
                // Find value packs
                HttpGetter valuePacksPageGet = new HttpGetter(
                        "https://secure.2degreesmobile.co.nz/group/ip/prevaluepack");
                String valuePacksPageString = valuePacksPageGet.execute();
                //DBLog.insertMessage(context, "d", "",  valuePacksPageString);
                if (valuePacksPageString != null) {
                    Document valuePacksPage = Jsoup.parse(valuePacksPageString);
                    Elements enabledPacks = valuePacksPage.getElementsByClass("yellow");
                    for (Element enabledPack : enabledPacks) {
                        Element offerNameElemt = enabledPack
                                .getElementsByAttributeValueStarting("name", "offername").first();
                        if (offerNameElemt != null) {
                            String offerName = offerNameElemt.val();
                            DBLog.insertMessage(context, "d", "", "Got element: " + offerName);
                            ValuePack[] packs = Values.valuePacks.get(offerName);
                            if (packs == null) {
                                DBLog.insertMessage(context, "d", "",
                                        "Offer name: " + offerName + " not matched.");
                            } else {
                                for (ValuePack pack : packs) {
                                    ContentValues values = new ContentValues();
                                    values.put("plan_startamount", pack.value);
                                    values.put("plan_name", offerName);
                                    DBLog.insertMessage(context, "d", "",
                                            "Pack " + pack.type.id + " start value set to " + pack.value);
                                    db.update("cache", values, "name = '" + pack.type.id + "'", null);
                                }
                            }
                        }
                    }
                }
            }

            SharedPreferences.Editor prefedit = sp.edit();
            Date now = new Date();
            prefedit.putString("updateDate", DateFormatters.ISO8601FORMAT.format(now));
            prefedit.putBoolean("loginFailed", false);
            prefedit.putBoolean("networkError", false);
            prefedit.commit();
            DBLog.insertMessage(context, "i", TAG, "Update Successful");
            return FetchResult.SUCCESS;

        }
    } catch (ClientProtocolException e) {
        DBLog.insertMessage(context, "w", TAG, "Network error: " + e.getMessage());
        return FetchResult.NETWORKERROR;
    } catch (IOException e) {
        DBLog.insertMessage(context, "w", TAG, "Network error: " + e.getMessage());
        return FetchResult.NETWORKERROR;
    } finally {
        db.close();
    }
    return null;
}

From source file:net.vexelon.mobileops.GLBClient.java

public String getCurrentBalance() throws HttpClientException {

    StringBuilder builder = new StringBuilder(100);
    HttpResponse resp;/*from   ww  w . j a v  a2s .c om*/
    long bytesCount = 0;
    try {
        String url = HTTP_MYTELENOR + GLBRequestType.GET_BALANCE.getPath();
        url += '?';
        url += new Date().getTime();

        HttpGet httpGet = new HttpGet(url);
        //         httpGet.setHeader("X-Requested-With", "XMLHttpRequest");
        resp = httpClient.execute(httpGet, httpContext);
    } catch (Exception e) {
        throw new HttpClientException("Client protocol error!" + e.getMessage(), e);
    }

    StatusLine status = resp.getStatusLine();

    if (status.getStatusCode() != HttpStatus.SC_OK)
        throw new HttpClientException(status.getReasonPhrase(), status.getStatusCode());

    try {
        HttpEntity entity = resp.getEntity();
        // bytes downloaded
        bytesCount = entity.getContentLength() > 0 ? entity.getContentLength() : 0;

        Document doc = Jsoup.parse(entity.getContent(), RESPONSE_ENCODING, "");
        Elements elements;

        // period bill
        elements = doc.select("#outstanding-amount");
        if (elements.size() > 0) {
            Elements divs = elements.get(0).select("div");
            for (Element el : divs) {
                String elClass = el.className();
                if (elClass.contains("custme-select") || elClass.equalsIgnoreCase("history")) {
                    builder.insert(0, el.html());
                }
            }
        }

        // current bill
        elements = doc.select("#bars-wrapper .p-price");
        if (elements.size() > 0) {
            Element el = elements.get(0);
            builder.insert(0, el.html());
        }

        return builder.toString();

    } catch (ClientProtocolException e) {
        throw new HttpClientException("Client protocol error!" + e.getMessage(), e);
    } catch (IOException e) {
        throw new HttpClientException("Client error!" + e.getMessage(), e);
    } finally {
        addDownloadedBytesCount(bytesCount);
    }
}

From source file:cn.edu.hfut.dmic.contentextractor.ContentExtractor.java

/**
 * ??:/*from  w  w  w. j  a  v  a 2  s . com*/
 * 1. ???
 * 2. ???????
 * 3. ??
 * 4. ?? ??
 * 5. ?
 *
 * @return
 * @throws XpathSyntaxErrorException
 */
private String getAuthor() throws XpathSyntaxErrorException {
    String author = "";
    if (StringUtils.isBlank(srcTime)) {
        author = getAuthor(doc.body().html());
        return author;
    }
    Element cur = doc.body().select("*:containsOwn(" + srcTime + ")").first();
    if (cur == null) {
        LOG.warn("?srcTime=" + srcTime);
        author = getAuthor(doc.body().html());
        return author;
    }

    if (!noText(cur)) {
        String arr[] = cur.html().split(srcTime);
        for (String text : arr) {
            author = getShortText(text);
            if (!StringUtils.isBlank(author))
                return author;
        }
    }
    Element parent = cur.parent();
    while (parent != null && noText(parent)) {
        cur = parent;
        parent = parent.parent();
    }
    author = getAuthor(parent.html());
    if (!StringUtils.isBlank(author))
        return author;

    Element pre = cur.previousElementSibling();
    while (pre != null && noText(pre)) {
        pre = pre.previousElementSibling();
    }
    if (pre != null) {
        author = getShortText(pre.text());
    }
    if (!StringUtils.isBlank(author))
        return author;
    Element next = cur.nextElementSibling();
    while (next != null && noText(next)) {
        next = next.nextElementSibling();
    }
    if (next != null) {
        author = getShortText(next.text());
    }
    if (!StringUtils.isBlank(author))
        return author;

    author = getShortText(parent.html().replace(srcTime, " "));
    if (!StringUtils.isBlank(author))
        return author;

    author = getAuthor(doc.body().html());
    if (StringUtils.isBlank(author)) {
        return author_bak;
    }
    return author;
}

From source file:com.weavers.duqhan.business.impl.ProductServiceImpl.java

@Override
public void loadTempProducts(List<StatusBean> statusBeans) {
    boolean isSuccess = true;
    String startDate = new Date().toString();
    Logger.getLogger(ProductServiceImpl.class.getName()).log(Level.SEVERE,
            "(==I==)DATE: " + startDate + "Store product details in temp product table start.....");
    try {/* w  w  w.j a  v a  2  s .  com*/
        String status = "";
        for (StatusBean statusBean : statusBeans) {
            status = "Link duplicate";
            Temtproductlinklist temtproductlinklist = temtproductlinklistDao.loadById(statusBean.getId());
            if (temtproductlinklist != null && temtproductlinklist.getStatus() == 0) {
                Product testProduct = productDao.getProductByExternelLink(temtproductlinklist.getLink());
                if (testProduct == null) {
                    String value = "";
                    Elements detailMain;
                    Elements detailSub;
                    Elements specifics;
                    double votes = 0.0;
                    double stars = 0.0;
                    double feedback = 0.0;
                    String url = temtproductlinklist.getLink();
                    try {
                        testProduct = new Product();
                        Product savedTestProduct;

                        //=================== Random sleep START ===================//
                        //                            TimeUnit.SECONDS.sleep(30 + (int) (Math.random() * 100));
                        Random randomObj = new Random();
                        TimeUnit.SECONDS.sleep(randomObj.ints(30, 60).findFirst().getAsInt());
                        //=================== Random sleep END =====================//

                        Document doc = Jsoup.connect(url).get();
                        detailMain = doc.select("#j-detail-page");
                        if (!detailMain.isEmpty()) {

                            //=================== Criteria Block START==================//
                            detailMain = doc.select(".rantings-num");
                            if (!detailMain.isEmpty()) {
                                votes = Double.valueOf(detailMain.text().split(" votes")[0].split("\\(")[1]);
                            }
                            detailMain = doc.select(".percent-num");
                            if (!detailMain.isEmpty()) {
                                stars = Double.valueOf(detailMain.text());
                            }
                            detailMain = doc.select("ul.ui-tab-nav li[data-trigger='feedback'] a");
                            if (!detailMain.isEmpty()) {
                                feedback = Double.valueOf(detailMain.text().split("\\(")[1].split("\\)")[0]);
                            }
                            //=================== Criteria Block END==================//

                            if (votes > 10.0 && stars > 4.0 && feedback > 4.0) {
                                detailMain = doc.select(".detail-wrap .product-name");
                                testProduct.setName(detailMain
                                        .text());/*.substring(0, Math.min(detailMain.text().length(), 50))*/
                                detailMain = doc.select(".detail-wrap .product-name");
                                testProduct.setDescription(detailMain.text());
                                testProduct.setExternalLink(url);
                                testProduct.setVendorId(1l);//??????????????????????

                                //=================== Packaging block START==================//
                                Double weight = 1.0;
                                Double width = 1.0;
                                Double height = 1.0;
                                Double length = 1.0;
                                detailMain = doc.select(
                                        "div#j-product-desc div.pnl-packaging-main ul li.packaging-item");
                                for (Element element : detailMain) {
                                    String packagingTitle = element.select("span.packaging-title").text();
                                    String packagingDesc = element.select("span.packaging-des").text();
                                    if (packagingTitle.trim().equals("Package Weight:")) {
                                        String str = packagingDesc;
                                        str = str.replaceAll("[^.?0-9]+", " ");
                                        if (Arrays.asList(str.trim().split(" ")) != null) {
                                            if (!Arrays.asList(str.trim().split(" ")).isEmpty()) {
                                                try {
                                                    weight = Double.parseDouble(
                                                            Arrays.asList(str.trim().split(" ")).get(0));
                                                } catch (Exception e) {
                                                    weight = 1.0;
                                                }
                                            }
                                        }
                                        System.out.println("weight == " + weight);
                                    } else if (packagingTitle.trim().equals("Package Size:")) {
                                        String str = packagingDesc;
                                        str = str.replaceAll("[^.?0-9]+", " ");
                                        if (Arrays.asList(str.trim().split(" ")) != null) {
                                            if (!Arrays.asList(str.trim().split(" ")).isEmpty()) {
                                                try {
                                                    width = Double.parseDouble(
                                                            Arrays.asList(str.trim().split(" ")).get(0));
                                                    height = Double.parseDouble(
                                                            Arrays.asList(str.trim().split(" ")).get(1));
                                                    length = Double.parseDouble(
                                                            Arrays.asList(str.trim().split(" ")).get(2));
                                                } catch (Exception e) {
                                                    width = 1.0;
                                                    height = 1.0;
                                                    length = 1.0;
                                                }
                                            }
                                        }
                                        System.out.println("width == " + width);
                                        System.out.println("height == " + height);
                                        System.out.println("length == " + length);
                                    }
                                }
                                //=================== Packaging block END==================//

                                //=================== Category block START==================//
                                detailMain = doc.select("div.ui-breadcrumb div.container a");
                                Long productCategoryId = 0L;
                                String parentPath = "";
                                String thisCategory = detailMain.last().text().trim();
                                System.out.println("thisCategory == " + thisCategory);
                                Category parentCategory = new Category();
                                parentCategory.setId(0L);
                                parentCategory.setParentPath("");
                                for (Element element : detailMain) {
                                    String newCategory;
                                    newCategory = element.text().trim();
                                    System.out.println("newCategory======" + newCategory);
                                    if (newCategory.equals("Home") || newCategory.equals("All Categories")) {
                                    } else {
                                        Category category = categoryDao.getCategoryByName(newCategory);
                                        if (category != null) {
                                            if (category.getName().equals(thisCategory)) {
                                                productCategoryId = category.getId();
                                                parentPath = category.getParentPath();
                                            }
                                            parentCategory = category;
                                        } else {
                                            category = new Category();
                                            category.setId(null);
                                            category.setName(newCategory);
                                            category.setParentId(parentCategory.getId());
                                            category.setParentPath(parentCategory.getParentPath()
                                                    + parentCategory.getId() + "=");
                                            category.setQuantity(0);
                                            category.setImgUrl("-");
                                            category.setDisplayText(newCategory);
                                            Category category2 = categoryDao.save(category);
                                            if (category.getName().equals(thisCategory)) {
                                                productCategoryId = category2.getId();
                                                parentPath = category2.getParentPath();
                                            }
                                            parentCategory = category2;
                                        }
                                    }
                                }
                                //=================== Category block END==================//

                                //=============== Specifications block START==============//
                                detailMain = doc.select(".product-property-list .property-item");
                                String specifications = "";
                                for (Element element : detailMain) {
                                    specifications = specifications
                                            + element.select(".propery-title").get(0).text().replace(",", "/")
                                                    .replace(":", "-")
                                            + ":" + element.select(".propery-des").get(0).text()
                                                    .replace(",", "/").replace(":", "-")
                                            + ",";//TODO:, check
                                }
                                //=============== Specifications Block END==============//

                                //=============== Shipping Time Block START==============//
                                String shippingTime = "";
                                detailMain = doc.select(".shipping-days[data-role='delivery-days']");
                                System.out.println("value detailMain" + detailMain.toString());
                                shippingTime = detailMain.text();
                                //=============== Shipping Time Block END==============//

                                //=============== Shipping Cost Block START==============//
                                detailMain = doc.select(".logistics-cost");
                                value = detailMain.text();
                                if (!value.equalsIgnoreCase("Free Shipping")) {
                                    //                                        f = 0.00;
                                } else {
                                    //                                        f = Double.parseDouble(value.replaceAll(".*?([\\d.]+).*", "$1"));
                                }
                                //=============== Shipping Cost Block END==============//

                                //=================Product save 1st START==============//
                                testProduct.setCategoryId(productCategoryId);
                                testProduct.setLastUpdate(new Date());
                                testProduct.setParentPath(parentPath);
                                testProduct.setImgurl("-");
                                testProduct.setProperties("-");
                                testProduct.setProductWidth(width);
                                testProduct.setProductLength(length);
                                testProduct.setProductWeight(weight);
                                testProduct.setProductHeight(height);
                                testProduct.setShippingRate(0.0);
                                testProduct.setShippingTime("45");
                                testProduct.setSpecifications(specifications);
                                savedTestProduct = productDao.save(testProduct);
                                //====================Product save 1st END==============//

                                //========= Property, Property Value, Property Product Map Block START ========//
                                double discountPrice = 0.0;
                                double actualPrice = 0.0;
                                double markupPrice = 0.0;
                                String id = "";
                                String allProperties = "";
                                //------------------------Read Color css START---------------------//
                                specifics = doc.select("#j-product-info-sku dl.p-property-item");
                                Elements cssdetailMain = doc.select("link[href]");
                                Document cssdoc = new Document("");
                                System.out.println(
                                        "====================================================cssdetailMain"
                                                + cssdetailMain.size());
                                for (Element element : cssdetailMain) {
                                    String cssurl = element.attr("abs:href");
                                    if (cssurl.contains("??main-detail")) {
                                        try {
                                            cssdoc = Jsoup.connect(cssurl).get();
                                        } catch (IOException ex) {

                                        }
                                        break;
                                    }
                                }
                                //-----------------------Read Color css END--------------------------//

                                //-----------Product Property, Property Value START--------//
                                Map<String, ProductPropertyvalues> propertyValuesMap = new HashMap<>();
                                if (!specifics.isEmpty()) {
                                    ProductProperties testPorperties;
                                    ProductProperties saveTestPorperties;
                                    ProductPropertyvalues testPropertyValues;
                                    for (Element specific : specifics) {
                                        System.out.println("head  ==== " + specific.select("dt").text());
                                        testPorperties = productPropertiesDao
                                                .loadByName(specific.select("dt").text());
                                        if (testPorperties == null) {
                                            testPorperties = new ProductProperties();
                                            testPorperties.setPropertyName(specific.select("dt").text());
                                            saveTestPorperties = productPropertiesDao.save(testPorperties);
                                        } else {
                                            saveTestPorperties = testPorperties;
                                        }
                                        allProperties = allProperties + saveTestPorperties.getId().toString()
                                                + "-";
                                        detailSub = specific.select("dd ul li");
                                        String valu = "-";
                                        for (Element element : detailSub) {
                                            testPropertyValues = new ProductPropertyvalues();
                                            id = element.select("a[data-sku-id]").attr("data-sku-id").trim();
                                            testPropertyValues.setRefId(id);
                                            if (element.hasClass("item-sku-image")) {
                                                valu = element.select("a img[src]").get(0).absUrl("src")
                                                        .split(".jpg")[0] + ".jpg";
                                                String title = element.select("a img").get(0).attr("title");
                                                String imgUrl = GoogleBucketFileUploader
                                                        .uploadProductImage(valu, savedTestProduct.getId());
                                                valu = "<img src='" + imgUrl + "' title='" + title
                                                        + "' style='height:40px; width:40px;'/>";
                                            } else if (element.hasClass("item-sku-color")) {
                                                String style = cssdoc.html().split("sku-color-" + id)[1]
                                                        .split("}")[0].substring(1);
                                                valu = "<span style='" + style
                                                        + "' ; height:40px; width:40px; display:block;'></span>";
                                            } else {
                                                valu = element.select("a span").toString();
                                            }
                                            System.out.println("valu === " + valu);
                                            testPropertyValues.setProductId(savedTestProduct.getId());
                                            testPropertyValues.setPropertyId(saveTestPorperties.getId());
                                            testPropertyValues.setValueName(valu);
                                            propertyValuesMap.put(id,
                                                    productPropertyvaluesDao.save(testPropertyValues));
                                        }
                                    }
                                    savedTestProduct.setProperties(allProperties);
                                }
                                //-----------Product Property, Property Value END--------//

                                //----------------------Read json START------------------//
                                List<AxpProductDto> axpProductDtos = new ArrayList<>();
                                Elements scripts = doc.select("script"); // Get the script part
                                for (Element script : scripts) {
                                    if (script.html().contains("var skuProducts=")) {
                                        String jsonData = "";
                                        jsonData = script.html().split("var skuProducts=")[1]
                                                .split("var GaData")[0].trim();
                                        jsonData = jsonData.substring(0, jsonData.length() - 1);
                                        Gson gsonObj = new Gson();
                                        axpProductDtos = Arrays
                                                .asList(gsonObj.fromJson(jsonData, AxpProductDto[].class));
                                        break;
                                    }
                                }
                                //----------------------Read json END------------------//

                                //-------------Product Properties Map START------------//
                                for (AxpProductDto thisAxpProductDto : axpProductDtos) {
                                    SkuVal skuVal = thisAxpProductDto.getSkuVal();
                                    if (skuVal.getActSkuCalPrice() != null) {
                                        value = skuVal.getActSkuCalPrice().trim();
                                        discountPrice = CurrencyConverter.usdTOinr(
                                                Double.parseDouble(value.replaceAll(".*?([\\d.]+).*", "$1")));
                                        value = skuVal.getSkuCalPrice().trim();
                                        actualPrice = CurrencyConverter.usdTOinr(
                                                Double.parseDouble(value.replaceAll(".*?([\\d.]+).*", "$1")));
                                        markupPrice = discountPrice * 0.15 + 100;
                                        discountPrice = Math.ceil((discountPrice + markupPrice) / 10) * 10;
                                        actualPrice = Math.round(actualPrice + markupPrice);
                                    } else {
                                        discountPrice = 0.0;
                                        value = skuVal.getSkuCalPrice().trim();
                                        actualPrice = CurrencyConverter.usdTOinr(
                                                Double.parseDouble(value.replaceAll(".*?([\\d.]+).*", "$1")));
                                        markupPrice = actualPrice * 0.15 + 100;
                                        discountPrice = Math.round(actualPrice + markupPrice);
                                        actualPrice = Math.round(actualPrice + markupPrice);
                                    }

                                    ProductPropertiesMap productPropertyMap = new ProductPropertiesMap();
                                    String myPropValueIds = "";
                                    if (thisAxpProductDto.getSkuAttr() != null) {
                                        String[] skuPropIds = thisAxpProductDto.getSkuPropIds().split(",");
                                        for (String skuPropId : skuPropIds) {
                                            myPropValueIds = myPropValueIds
                                                    + propertyValuesMap.get(skuPropId).getId().toString() + "_";
                                        }

                                        productPropertyMap.setPropertyvalueComposition(myPropValueIds);
                                    } else {
                                        productPropertyMap.setPropertyvalueComposition("_");
                                    }
                                    productPropertyMap.setDiscount(discountPrice);
                                    productPropertyMap.setPrice(actualPrice);
                                    productPropertyMap.setProductId(savedTestProduct);
                                    productPropertyMap.setQuantity(5l);
                                    productPropertiesMapDao.save(productPropertyMap);
                                }
                                //-------------Product Properties Map START------------//
                                //========= Property, Property Value, Property Product Map Block END ========//

                                //============= Multiple Image Block START =============//
                                detailMain = doc.select("ul.image-thumb-list span.img-thumb-item img[src]");
                                int flg = 0;
                                String imgUrl = "";
                                for (Element element : detailMain) {
                                    imgUrl = GoogleBucketFileUploader.uploadProductImage(
                                            element.absUrl("src").split(".jpg")[0] + ".jpg",
                                            savedTestProduct.getId());
                                    if (flg == 0) {
                                        flg++;
                                        savedTestProduct.setImgurl(imgUrl);
                                    } else {
                                        ProductImg productImg = new ProductImg();
                                        productImg.setId(null);
                                        productImg.setImgUrl(imgUrl);
                                        productImg.setProductId(savedTestProduct.getId());
                                        productImgDao.save(productImg);
                                    }
                                }
                                //============= Multiple Image Block END =============//

                                //=================Product save final START==============//
                                if (productDao.save(savedTestProduct) != null) {
                                    temtproductlinklist.setStatus(1);//
                                    temtproductlinklistDao.save(temtproductlinklist);
                                    status = "Success";
                                }
                                //=================Product save final START==============//
                            } else {
                                temtproductlinklist.setStatus(2);//
                                temtproductlinklistDao.save(temtproductlinklist);
                                status = "criteria mismatch";
                            }
                        } else {
                            status = "Page not found";
                        }
                    } catch (Exception ex) {
                        System.out.println(
                                "=============================================================Exception1" + ex);
                        temtproductlinklist.setStatus(4);//
                        temtproductlinklistDao.save(temtproductlinklist);
                        System.out.println("Exception === " + ex);
                        status = "Failure";
                        Logger.getLogger(ProductServiceImpl.class.getName()).log(Level.SEVERE, "(==E==)DATE: "
                                + new Date().toString()
                                + "Store product details in temp product table get error in sub process.....\n Link Id: "
                                + statusBean.getId() + "\n Started on" + startDate, ex);
                    }
                } else {
                    temtproductlinklist.setStatus(3);//
                    temtproductlinklistDao.save(temtproductlinklist);
                    status = "Product exsist";
                }
            }
            //                String body = "Id: " + temtproductlinklist.getId() + "<br/> Status: " + status;
            //                MailSender.sendEmail("krisanu.nandi@pkweb.in", "Product captured", body, "subhendu.sett@pkweb.in");
            statusBean.setStatus(status);
        }
        System.out.println("=============================================================status" + status);
    } catch (Exception e) {
        System.out.println("=============================================================Exception2" + e);
        isSuccess = false;
        String body = "(==E==)DATE: " + new Date().toString()
                + "Store product details in temp product table get error.....<br/> Started on" + startDate
                + "<br/>";
        Logger.getLogger(ProductServiceImpl.class.getName()).log(Level.SEVERE, body, e);
        //            MailSender.sendEmail("krisanu.nandi@pkweb.in", "Stopped store product details", body + e.getLocalizedMessage(), "subhendu.sett@pkweb.in");
    }
    if (isSuccess) {
        String body = "(==I==)DATE: " + new Date().toString()
                + "Store product details in temp product table end.....<br/> Started on" + startDate;
        Logger.getLogger(ProductServiceImpl.class.getName()).log(Level.SEVERE, body);
        /*ObjectMapper mapper = new ObjectMapper();
        try {
        MailSender.sendEmail("krisanu.nandi@pkweb.in", "Completed store product details", body + "=============<br/><br/>" + mapper.writeValueAsString(statusBeans), "subhendu.sett@pkweb.in");
        } catch (JsonProcessingException ex) {
        Logger.getLogger(ProductServiceImpl.class.getName()).log(Level.SEVERE, null, ex);
        }*/
    }
    //        return statusBeans;
    System.out.println("=============================================================end");
}

From source file:com.salsaberries.narchiver.Trawler.java

/**
 * Extracts links from html, and returns a set of Pages with their parent
 * page already defined./*from   w  w w. j a  v  a2  s  .c  om*/
 *
 * @param html
 * @return A list of pages to follow.
 */
private ArrayList<Page> extractPages(Page extractPage) {

    String html = extractPage.getHtml();

    ArrayList<Page> pages = new ArrayList<>();

    // Are we at a stop at page?
    for (String e : stopAt) {
        if (extractPage.getTagURL().contains(e)) {
            return pages;
        }
    }

    // Parse the html
    Document doc = Jsoup.parse(html);
    Elements links = doc.getElementsByTag("a");

    for (Element link : links) {

        String tagURL = "";
        String linkText = "";
        boolean alreadyFollowed;
        boolean validURL = false;

        // First format the link
        if (link.attr("href").startsWith(baseURL)) {
            tagURL = link.attr("href").replace(baseURL, "");
            linkText = link.html();
            validURL = true;
        } else if (link.attr("href").startsWith("/")) {
            tagURL = link.attr("href");
            linkText = link.html();
            validURL = true;
        } else if (link.attr("href").startsWith("./")) {
            tagURL = link.attr("href").substring(1);
            linkText = link.html();
            validURL = true;
        }

        //else if (!link.attr("href").startsWith("/") && !link.attr("href").startsWith("http")) {
        //    tagURL = "/" + link.attr("href");
        //    linkText = link.html();
        //    validURL = true;
        //}
        // Has it already been followed?
        alreadyFollowed = trawledPages.contains(tagURL);

        // Does it violate the exclusion rules?
        boolean excluded = false;
        for (String e : exclude) {
            if (tagURL.contains(e)) {
                excluded = true;
            }
        }

        // Does it violate the exclusion equal rule?
        for (String e : excludeIfEqual) {
            if (tagURL.equals(e)) {
                excluded = true;
            }
        }

        if (!alreadyFollowed && validURL && !excluded) {
            logger.debug("Creating new page at URL " + tagURL);
            Page page = new Page(tagURL, extractPage, linkText);
            trawledPages.add(tagURL);
            pages.add(page);
        }

        if (alreadyFollowed) {
            logger.debug("Skipping duplicate at URL " + tagURL);
        }
        if (!validURL) {
            logger.debug("Invalid URL at " + link.attr("href"));
        }
        if (excluded) {
            logger.debug("Exclusion at " + link.attr("href"));
        }
    }
    return pages;
}

From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java

@Override
public ReservationResult reservation(DetailledItem item, Account acc, int useraction, String selection)
        throws IOException {
    String reservation_info = item.getReservation_info();

    Document doc = null;//from w ww  .  ja v a  2 s.  co m

    if (useraction == MultiStepResult.ACTION_CONFIRMATION) {
        List<NameValuePair> nameValuePairs = new ArrayList<>(2);
        nameValuePairs.add(new BasicNameValuePair("make_allvl", "Bestaetigung"));
        nameValuePairs.add(new BasicNameValuePair("target", "makevorbest"));
        httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs), getDefaultEncoding());
        return new ReservationResult(MultiStepResult.Status.OK);
    } else if (selection == null || useraction == 0) {
        String html = httpGet(opac_url + "/" + reservation_info, getDefaultEncoding());
        doc = Jsoup.parse(html);

        if (doc.select("input[name=AUSWEIS]").size() > 0) {
            // Needs login
            List<NameValuePair> nameValuePairs = new ArrayList<>(2);
            nameValuePairs.add(new BasicNameValuePair("AUSWEIS", acc.getName()));
            nameValuePairs.add(new BasicNameValuePair("PWD", acc.getPassword()));
            if (data.has("db")) {
                try {
                    nameValuePairs.add(new BasicNameValuePair("vkontodb", data.getString("db")));
                } catch (JSONException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
            nameValuePairs.add(new BasicNameValuePair("B1", "weiter"));
            nameValuePairs.add(new BasicNameValuePair("target", doc.select("input[name=target]").val()));
            nameValuePairs.add(new BasicNameValuePair("type", "VT2"));
            html = httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs),
                    getDefaultEncoding());
            doc = Jsoup.parse(html);
        }
        if (doc.select("select[name=" + branch_inputfield + "]").size() == 0) {
            if (doc.select("select[name=VZST]").size() > 0) {
                branch_inputfield = "VZST";
            }
        }
        if (doc.select("select[name=" + branch_inputfield + "]").size() > 0) {
            List<Map<String, String>> branches = new ArrayList<>();
            for (Element option : doc.select("select[name=" + branch_inputfield + "]").first().children()) {
                String value = option.text().trim();
                String key;
                if (option.hasAttr("value")) {
                    key = option.attr("value");
                } else {
                    key = value;
                }
                Map<String, String> selopt = new HashMap<>();
                selopt.put("key", key);
                selopt.put("value", value);
                branches.add(selopt);
            }
            _res_target = doc.select("input[name=target]").attr("value");
            ReservationResult result = new ReservationResult(MultiStepResult.Status.SELECTION_NEEDED);
            result.setActionIdentifier(ReservationResult.ACTION_BRANCH);
            result.setSelection(branches);
            return result;
        }
    } else if (useraction == ReservationResult.ACTION_BRANCH) {
        List<NameValuePair> nameValuePairs = new ArrayList<>(2);
        nameValuePairs.add(new BasicNameValuePair(branch_inputfield, selection));
        nameValuePairs.add(new BasicNameValuePair("button2", "weiter"));
        nameValuePairs.add(new BasicNameValuePair("target", _res_target));
        String html = httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs),
                getDefaultEncoding());
        doc = Jsoup.parse(html);
    }

    if (doc == null) {
        return new ReservationResult(MultiStepResult.Status.ERROR);
    }

    if (doc.select("input[name=target]").size() > 0) {
        if (doc.select("input[name=target]").attr("value").equals("makevorbest")) {
            List<String[]> details = new ArrayList<>();

            if (doc.getElementsByClass("kontomeldung").size() == 1) {
                details.add(new String[] { doc.getElementsByClass("kontomeldung").get(0).text().trim() });
            }
            Pattern p = Pattern.compile("geb.hr", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE);
            for (Element div : doc.select(".kontozeile_center")) {
                for (String text : Jsoup.parse(div.html().replaceAll("(?i)<br[^>]*>", "br2n")).text()
                        .split("br2n")) {
                    if (p.matcher(text).find() && !text.contains("usstehend")
                            && text.contains("orbestellung")) {
                        details.add(new String[] { text.trim() });
                    }
                }
            }

            if (doc.select("#vorbest").size() > 0 && doc.select("#vorbest").val().contains("(")) {
                // Erlangen uses "Kostenpflichtige Vorbestellung (1 Euro)"
                // as the label of its reservation button
                details.add(new String[] { doc.select("#vorbest").val().trim() });
            }

            for (Element row : doc.select(".kontozeile_center table tr")) {
                if (row.select(".konto_feld").size() == 1 && row.select(".konto_feldinhalt").size() == 1) {
                    details.add(new String[] { row.select(".konto_feld").text().trim(),
                            row.select(".konto_feldinhalt").text().trim() });
                }
            }
            ReservationResult result = new ReservationResult(MultiStepResult.Status.CONFIRMATION_NEEDED);
            result.setDetails(details);
            return result;
        }
    }

    if (doc.getElementsByClass("kontomeldung").size() == 1) {
        return new ReservationResult(MultiStepResult.Status.ERROR,
                doc.getElementsByClass("kontomeldung").get(0).text());
    }

    return new ReservationResult(MultiStepResult.Status.ERROR,
            stringProvider.getString(StringProvider.UNKNOWN_ERROR));
}

From source file:me.vertretungsplan.parser.IndiwareParser.java

void parseIndiwarePage(SubstitutionSchedule v, String response) throws JSONException, IOException {
    boolean html;
    Element doc;
    if (response.contains("<html") || response.contains("<table")) {
        html = true;/*from www  . j a v a 2 s  . c o m*/
        doc = Jsoup.parse(response);
    } else {
        html = false;
        doc = Jsoup.parse(response, "", Parser.xmlParser());
    }
    if (html && data.has(PARAM_EMBEDDED_CONTENT_SELECTOR)) {
        String selector = data.getString(PARAM_EMBEDDED_CONTENT_SELECTOR);
        Elements elems = doc.select(selector);
        if (elems.size() == 0)
            throw new IOException("No elements found using " + selector);
        for (Element elem : elems) {
            v.addDay(parseIndiwareDay(elem, true));
        }
    } else if (html && doc.select(".vpfuer").size() > 1) {
        // multiple schedules after each other on one page
        String[] htmls = doc.html().split("<span class=\"vpfuer\">");
        for (int i = 1; i < htmls.length; i++) {
            Document splitDoc = Jsoup.parse(htmls[i]);
            v.addDay(parseIndiwareDay(splitDoc, true));
        }
    } else {
        v.addDay(parseIndiwareDay(doc, html));
    }
}

From source file:net.pixomania.crawler.W3C.parser.rules.editors.EditorsRule7.java

@Override
public ArrayList<Person> run(String url, Document doc) {
    ArrayList<Person> editorList = new ArrayList<>();

    Elements editors = doc.select("dt:contains(Authors/Editors) ~ dd, dt:contains(Author/Editor) ~ dd");
    if (editors.size() == 0)
        return null;

    boolean skip = false;
    for (Element editor : editors) {
        Element prev = editor.previousElementSibling();
        if (prev.tagName().equals("dt")) {
            if (!prev.text().trim().toLowerCase().startsWith("authors/editors")
                    && !prev.text().trim().toLowerCase().startsWith("author/editor")) {
                skip = true;//from  ww  w  . jav  a2  s .co  m
            }
        }

        if (skip) {
            Element next = editor.nextElementSibling();
            if (next != null) {
                if (next.text().trim().toLowerCase().startsWith("authors/editors")
                        || next.text().trim().toLowerCase().startsWith("author/editor")) {
                    skip = false;
                    continue;
                }
            }
            continue;
        }

        if (StringUtils.countMatches(editor.text(), " - ") > 2) {
            Log.log("warning", url + ": This editor may be a list of editors separated by  - ");
            EditorsRule5 ed5 = new EditorsRule5();

            return ed5.run(url, doc);
        }

        String[] splitted = editor.html().split("<br />|<br clear=\"none\" />");

        if (splitted.length < 2) {
            if (editor.text().equals("WHATWG:") || editor.text().equals("W3C:"))
                continue;
            Person result = NameParser.parse(editor.text());
            if (result == null)
                continue;

            for (int i = 0; i < editor.select("a").size(); i++) {
                if (!editor.select("a").get(i).attr("href").isEmpty()) {
                    if (editor.select("a").get(i).attr("href").contains("@")) {
                        result.setEmail(editor.select("a").get(i).attr("href").replace("mailto:", ""));
                    } else {
                        result.addWebsite(editor.select("a").get(i).attr("href"));
                    }
                }
            }

            editorList.add(result);
        } else {
            for (String split : splitted) {
                if (!split.isEmpty()) {
                    if (split.equals("WHATWG:") || split.equals("W3C:"))
                        continue;
                    Document newdoc = Jsoup.parse(split.replaceAll("\n", ""));
                    Person result = NameParser.parse(newdoc.text());
                    if (result == null)
                        continue;

                    for (int i = 0; i < newdoc.select("a").size(); i++) {
                        if (!newdoc.select("a").get(i).attr("href").isEmpty()) {
                            if (newdoc.select("a").get(i).attr("href").contains("@")) {
                                result.setEmail(newdoc.select("a").get(i).attr("href").replace("mailto:", ""));
                            } else {
                                result.addWebsite(newdoc.select("a").get(i).attr("href"));
                            }
                        }
                    }

                    editorList.add(result);
                }
            }
        }

        Element next = editor.nextElementSibling();
        if (next != null)
            if (next.tag().getName().equals("dt"))
                break;
    }

    if (editorList.size() == 0)
        return null;

    return editorList;
}