Example usage for org.jsoup.nodes Element text

List of usage examples for org.jsoup.nodes Element text

Introduction

In this page you can find the example usage for org.jsoup.nodes Element text.

Prototype

public String text() 

Source Link

Document

Gets the combined text of this element and all its children.

Usage

From source file:com.github.binlee1990.spider.movie.spider.MovieCrawler.java

private List<Actor> createOrGetActorList(Document doc) {
    List<Actor> actorList = Lists.newArrayList();

    Elements keyElements = doc.select(".fm-minfo dt");
    Elements valueElements = doc.select(".fm-minfo dd");
    if (CollectionUtils.isNotEmpty(keyElements) && CollectionUtils.isNotEmpty(valueElements)) {
        int keyI = 0;
        for (; keyI < keyElements.size(); keyI++) {
            Element keyElement = keyElements.get(keyI);
            Element valueElement = valueElements.get(keyI);

            if (null != keyElement && null != valueElement) {
                String key = StringUtils.trimToEmpty(keyElement.text().toString());
                if (StringUtils.isNotBlank(key)) {
                    String value = StringUtils.trimToEmpty(valueElement.text().toString());

                    if (StringUtils.equalsIgnoreCase(key, "")) {
                        Elements actorNameElements = valueElement.select("a");
                        if (CollectionUtils.isNotEmpty(actorNameElements)) {
                            actorNameElements.forEach(actorNameElement -> {
                                String actorName = StringUtils.trimToEmpty(actorNameElement.text().toString());
                                if (StringUtils.isNotBlank(actorName)) {
                                    Actor actor = createOrQueryActor(actorName);
                                    if (null != actor) {
                                        actorList.add(actor);
                                    }/* w  w w.  j a va 2 s . c o m*/
                                }
                            });
                        }

                        break;
                    }
                }
            }
        }
    }

    return actorList;
}

From source file:com.github.binlee1990.spider.movie.spider.MovieCrawler.java

private void addFilmRegionList(Document doc, Film film) {
    Elements keyElements = doc.select(".fm-minfo dt");
    Elements valueElements = doc.select(".fm-minfo dd");
    if (CollectionUtils.isNotEmpty(keyElements) && CollectionUtils.isNotEmpty(valueElements)) {
        int keyI = 0;
        for (; keyI < keyElements.size(); keyI++) {
            Element keyElement = keyElements.get(keyI);
            Element valueElement = valueElements.get(keyI);

            if (null != keyElement && null != valueElement) {
                String key = StringUtils.trimToEmpty(keyElement.text().toString());
                if (StringUtils.isNotBlank(key)) {
                    String value = StringUtils.trimToEmpty(valueElement.text().toString());

                    if (StringUtils.equalsIgnoreCase(key, "") && StringUtils.isNotBlank(value)) {
                        List<String> regionList = SLASH_SPLITTER.splitToList(value);
                        if (CollectionUtils.isNotEmpty(regionList)) {
                            regionList.forEach(region -> {
                                EnumRegion queryRegion = new EnumRegion();
                                queryRegion.setUrlRegion(region);
                                EnumRegion enumRegion = enumRegionMapper
                                        .queryEnumRegionByEnumRegion(queryRegion);

                                if (null != enumRegion) {
                                    FilmRegion filmRegion = new FilmRegion();
                                    filmRegion.setFilmCode(film.getCode());
                                    filmRegion.setRegionId(enumRegion.getId());

                                    Date now = new Date();
                                    filmRegion.setCreateTime(now);
                                    filmRegion.setUpdateTime(now);
                                    filmRegionMapper.insertSelective(filmRegion);
                                }//from  w  ww .  ja v a2s  .co  m
                            });
                        }

                        break;
                    }
                }
            }
        }
    }
}

From source file:biz.shadowservices.DegreesToolbox.DataFetcher.java

public FetchResult updateData(Context context, boolean force) {
    //Open database
    DBOpenHelper dbhelper = new DBOpenHelper(context);
    SQLiteDatabase db = dbhelper.getWritableDatabase();

    // check for internet connectivity
    try {/*  w  ww.ja v  a  2 s. co m*/
        if (!isOnline(context)) {
            Log.d(TAG, "We do not seem to be online. Skipping Update.");
            return FetchResult.NOTONLINE;
        }
    } catch (Exception e) {
        exceptionReporter.reportException(Thread.currentThread(), e, "Exception during isOnline()");
    }
    SharedPreferences sp = PreferenceManager.getDefaultSharedPreferences(context);
    if (!force) {
        try {
            if (sp.getBoolean("loginFailed", false) == true) {
                Log.d(TAG, "Previous login failed. Skipping Update.");
                DBLog.insertMessage(context, "i", TAG, "Previous login failed. Skipping Update.");
                return FetchResult.LOGINFAILED;
            }
            if (sp.getBoolean("autoupdates", true) == false) {
                Log.d(TAG, "Automatic updates not enabled. Skipping Update.");
                DBLog.insertMessage(context, "i", TAG, "Automatic updates not enabled. Skipping Update.");
                return FetchResult.NOTALLOWED;
            }
            if (!isBackgroundDataEnabled(context) && sp.getBoolean("obeyBackgroundData", true)) {
                Log.d(TAG, "Background data not enabled. Skipping Update.");
                DBLog.insertMessage(context, "i", TAG, "Background data not enabled. Skipping Update.");
                return FetchResult.NOTALLOWED;
            }
            if (!isAutoSyncEnabled() && sp.getBoolean("obeyAutoSync", true)
                    && sp.getBoolean("obeyBackgroundData", true)) {
                Log.d(TAG, "Auto sync not enabled. Skipping Update.");
                DBLog.insertMessage(context, "i", TAG, "Auto sync not enabled. Skipping Update.");
                return FetchResult.NOTALLOWED;
            }
            if (isWifi(context) && !sp.getBoolean("wifiUpdates", true)) {
                Log.d(TAG, "On wifi, and wifi auto updates not allowed. Skipping Update");
                DBLog.insertMessage(context, "i", TAG,
                        "On wifi, and wifi auto updates not allowed. Skipping Update");
                return FetchResult.NOTALLOWED;
            } else if (!isWifi(context)) {
                Log.d(TAG, "We are not on wifi.");
                if (!isRoaming(context) && !sp.getBoolean("2DData", true)) {
                    Log.d(TAG, "Automatic updates on 2Degrees data not enabled. Skipping Update.");
                    DBLog.insertMessage(context, "i", TAG,
                            "Automatic updates on 2Degrees data not enabled. Skipping Update.");
                    return FetchResult.NOTALLOWED;
                } else if (isRoaming(context) && !sp.getBoolean("roamingData", false)) {
                    Log.d(TAG, "Automatic updates on roaming mobile data not enabled. Skipping Update.");
                    DBLog.insertMessage(context, "i", TAG,
                            "Automatic updates on roaming mobile data not enabled. Skipping Update.");
                    return FetchResult.NOTALLOWED;
                }

            }
        } catch (Exception e) {
            exceptionReporter.reportException(Thread.currentThread(), e,
                    "Exception while finding if to update.");
        }

    } else {
        Log.d(TAG, "Update Forced");
    }

    try {
        String username = sp.getString("username", null);
        String password = sp.getString("password", null);
        if (username == null || password == null) {
            DBLog.insertMessage(context, "i", TAG, "Username or password not set.");
            return FetchResult.USERNAMEPASSWORDNOTSET;
        }

        // Find the URL of the page to send login data to.
        Log.d(TAG, "Finding Action. ");
        HttpGetter loginPageGet = new HttpGetter("https://secure.2degreesmobile.co.nz/web/ip/login");
        String loginPageString = loginPageGet.execute();
        if (loginPageString != null) {
            Document loginPage = Jsoup.parse(loginPageString,
                    "https://secure.2degreesmobile.co.nz/web/ip/login");
            Element loginForm = loginPage.getElementsByAttributeValue("name", "loginFrm").first();
            String loginAction = loginForm.attr("action");
            // Send login form
            List<NameValuePair> loginValues = new ArrayList<NameValuePair>();
            loginValues.add(new BasicNameValuePair("externalURLRedirect", ""));
            loginValues.add(new BasicNameValuePair("hdnAction", "login_userlogin"));
            loginValues.add(new BasicNameValuePair("hdnAuthenticationType", "M"));
            loginValues.add(new BasicNameValuePair("hdnlocale", ""));

            loginValues.add(new BasicNameValuePair("userid", username));
            loginValues.add(new BasicNameValuePair("password", password));
            Log.d(TAG, "Sending Login ");
            HttpPoster sendLoginPoster = new HttpPoster(loginAction, loginValues);
            // Parse result

            String loginResponse = sendLoginPoster.execute();
            Document loginResponseParsed = Jsoup.parse(loginResponse);
            // Determine if this is a pre-pay or post-paid account.
            boolean postPaid;
            if (loginResponseParsed
                    .getElementById("p_CustomerPortalPostPaidHomePage_WAR_customerportalhomepage") == null) {
                Log.d(TAG, "Pre-pay account or no account.");
                postPaid = false;
            } else {
                Log.d(TAG, "Post-paid account.");
                postPaid = true;
            }

            String homepageUrl = "https://secure.2degreesmobile.co.nz/group/ip/home";
            if (postPaid) {
                homepageUrl = "https://secure.2degreesmobile.co.nz/group/ip/postpaid";
            }
            HttpGetter homepageGetter = new HttpGetter(homepageUrl);
            String homepageHTML = homepageGetter.execute();
            Document homePage = Jsoup.parse(homepageHTML);

            Element accountSummary = homePage.getElementById("accountSummary");
            if (accountSummary == null) {
                Log.d(TAG, "Login failed.");
                return FetchResult.LOGINFAILED;
            }
            db.delete("cache", "", null);
            /* This code fetched some extra details for postpaid users, but on reflection they aren't that useful.
             * Might reconsider this.
             *
             if (postPaid) {
                     
               Element accountBalanceSummaryTable = accountSummary.getElementsByClass("tableBillSummary").first();
               Elements rows = accountBalanceSummaryTable.getElementsByTag("tr");
               int rowno = 0;
               for (Element row : rows) {
                  if (rowno > 1) {
             break;
                  }
                  //Log.d(TAG, "Starting row");
                  //Log.d(TAG, row.html());
                  Double value;
                  try {
             Element amount = row.getElementsByClass("tableBillamount").first();
             String amountHTML = amount.html();
             Log.d(TAG, amountHTML.substring(1));
             value = Double.parseDouble(amountHTML.substring(1));
                  } catch (Exception e) {
             Log.d(TAG, "Failed to parse amount from row.");
             value = null;
                  }
                  String expiresDetails = "";
                  String expiresDate = null;
                  String name = null;
                  try {
             Element details = row.getElementsByClass("tableBilldetail").first();
             name = details.ownText();
             Element expires = details.getElementsByTag("em").first();
             if (expires != null) {
                 expiresDetails = expires.text();
             } 
             Log.d(TAG, expiresDetails);
             Pattern pattern;
             pattern = Pattern.compile("\\(payment is due (.*)\\)");
             Matcher matcher = pattern.matcher(expiresDetails);
             if (matcher.find()) {
                /*Log.d(TAG, "matched expires");
                Log.d(TAG, "group 0:" + matcher.group(0));
                Log.d(TAG, "group 1:" + matcher.group(1));
                Log.d(TAG, "group 2:" + matcher.group(2)); *
                String expiresDateString = matcher.group(1);
                Date expiresDateObj;
                if (expiresDateString != null) {
                   if (expiresDateString.length() > 0) {
                      try {
                         expiresDateObj = DateFormatters.EXPIRESDATE.parse(expiresDateString);
                         expiresDate = DateFormatters.ISO8601DATEONLYFORMAT.format(expiresDateObj);
                      } catch (java.text.ParseException e) {
                         Log.d(TAG, "Could not parse date: " + expiresDateString);
                      }
                   }   
                }
             }
                  } catch (Exception e) {
             Log.d(TAG, "Failed to parse details from row.");
                  }
                  String expirev = null;
                  ContentValues values = new ContentValues();
                  values.put("name", name);
                  values.put("value", value);
                  values.put("units", "$NZ");
                  values.put("expires_value", expirev );
                  values.put("expires_date", expiresDate);
                  db.insert("cache", "value", values );
                  rowno++;
               }
            } */
            Element accountSummaryTable = accountSummary.getElementsByClass("tableAccountSummary").first();
            Elements rows = accountSummaryTable.getElementsByTag("tr");
            for (Element row : rows) {
                // We are now looking at each of the rows in the data table.
                //Log.d(TAG, "Starting row");
                //Log.d(TAG, row.html());
                Double value;
                String units;
                try {
                    Element amount = row.getElementsByClass("tableBillamount").first();
                    String amountHTML = amount.html();
                    //Log.d(TAG, amountHTML);
                    String[] amountParts = amountHTML.split("&nbsp;", 2);
                    //Log.d(TAG, amountParts[0]);
                    //Log.d(TAG, amountParts[1]);
                    if (amountParts[0].contains("Included") || amountParts[0].equals("All You Need")
                            || amountParts[0].equals("Unlimited Text*")) {
                        value = Values.INCLUDED;
                    } else {
                        try {
                            value = Double.parseDouble(amountParts[0]);
                        } catch (NumberFormatException e) {
                            exceptionReporter.reportException(Thread.currentThread(), e, "Decoding value.");
                            value = 0.0;
                        }
                    }
                    units = amountParts[1];
                } catch (NullPointerException e) {
                    //Log.d(TAG, "Failed to parse amount from row.");
                    value = null;
                    units = null;
                }
                Element details = row.getElementsByClass("tableBilldetail").first();
                String name = details.getElementsByTag("strong").first().text();
                Element expires = details.getElementsByTag("em").first();
                String expiresDetails = "";
                if (expires != null) {
                    expiresDetails = expires.text();
                }
                Log.d(TAG, expiresDetails);
                Pattern pattern;
                if (postPaid == false) {
                    pattern = Pattern.compile("\\(([\\d\\.]*) ?\\w*? ?expiring on (.*)\\)");
                } else {
                    pattern = Pattern.compile("\\(([\\d\\.]*) ?\\w*? ?will expire on (.*)\\)");
                }
                Matcher matcher = pattern.matcher(expiresDetails);
                Double expiresValue = null;
                String expiresDate = null;
                if (matcher.find()) {
                    /*Log.d(TAG, "matched expires");
                    Log.d(TAG, "group 0:" + matcher.group(0));
                    Log.d(TAG, "group 1:" + matcher.group(1));
                    Log.d(TAG, "group 2:" + matcher.group(2)); */
                    try {
                        expiresValue = Double.parseDouble(matcher.group(1));
                    } catch (NumberFormatException e) {
                        expiresValue = null;
                    }
                    String expiresDateString = matcher.group(2);
                    Date expiresDateObj;
                    if (expiresDateString != null) {
                        if (expiresDateString.length() > 0) {
                            try {
                                expiresDateObj = DateFormatters.EXPIRESDATE.parse(expiresDateString);
                                expiresDate = DateFormatters.ISO8601DATEONLYFORMAT.format(expiresDateObj);
                            } catch (java.text.ParseException e) {
                                Log.d(TAG, "Could not parse date: " + expiresDateString);
                            }
                        }
                    }
                }
                ContentValues values = new ContentValues();
                values.put("name", name);
                values.put("value", value);
                values.put("units", units);
                values.put("expires_value", expiresValue);
                values.put("expires_date", expiresDate);
                db.insert("cache", "value", values);
            }

            if (postPaid == false) {
                Log.d(TAG, "Getting Value packs...");
                // Find value packs
                HttpGetter valuePacksPageGet = new HttpGetter(
                        "https://secure.2degreesmobile.co.nz/group/ip/prevaluepack");
                String valuePacksPageString = valuePacksPageGet.execute();
                //DBLog.insertMessage(context, "d", "",  valuePacksPageString);
                if (valuePacksPageString != null) {
                    Document valuePacksPage = Jsoup.parse(valuePacksPageString);
                    Elements enabledPacks = valuePacksPage.getElementsByClass("yellow");
                    for (Element enabledPack : enabledPacks) {
                        Element offerNameElemt = enabledPack
                                .getElementsByAttributeValueStarting("name", "offername").first();
                        if (offerNameElemt != null) {
                            String offerName = offerNameElemt.val();
                            DBLog.insertMessage(context, "d", "", "Got element: " + offerName);
                            ValuePack[] packs = Values.valuePacks.get(offerName);
                            if (packs == null) {
                                DBLog.insertMessage(context, "d", "",
                                        "Offer name: " + offerName + " not matched.");
                            } else {
                                for (ValuePack pack : packs) {
                                    ContentValues values = new ContentValues();
                                    values.put("plan_startamount", pack.value);
                                    values.put("plan_name", offerName);
                                    DBLog.insertMessage(context, "d", "",
                                            "Pack " + pack.type.id + " start value set to " + pack.value);
                                    db.update("cache", values, "name = '" + pack.type.id + "'", null);
                                }
                            }
                        }
                    }
                }
            }

            SharedPreferences.Editor prefedit = sp.edit();
            Date now = new Date();
            prefedit.putString("updateDate", DateFormatters.ISO8601FORMAT.format(now));
            prefedit.putBoolean("loginFailed", false);
            prefedit.putBoolean("networkError", false);
            prefedit.commit();
            DBLog.insertMessage(context, "i", TAG, "Update Successful");
            return FetchResult.SUCCESS;

        }
    } catch (ClientProtocolException e) {
        DBLog.insertMessage(context, "w", TAG, "Network error: " + e.getMessage());
        return FetchResult.NETWORKERROR;
    } catch (IOException e) {
        DBLog.insertMessage(context, "w", TAG, "Network error: " + e.getMessage());
        return FetchResult.NETWORKERROR;
    } finally {
        db.close();
    }
    return null;
}

From source file:de.geeksfactory.opacclient.apis.Zones22.java

@Override
public ReservationResult reservation(DetailledItem item, Account acc, int useraction, String selection)
        throws IOException {
    String reservation_info = item.getReservation_info();
    String html = httpGet(opac_url + "/" + reservation_info, getDefaultEncoding());
    Document doc = Jsoup.parse(html);
    if (html.contains("Geheimnummer")) {
        List<NameValuePair> params = new ArrayList<NameValuePair>();
        for (Element input : doc.select("#MainForm input")) {
            if (!input.attr("name").equals("BRWR") && !input.attr("name").equals("PIN")) {
                params.add(new BasicNameValuePair(input.attr("name"), input.attr("value")));
            }/*from  w  w w  .  j  ava  2  s.c  o m*/
        }
        params.add(new BasicNameValuePair("BRWR", acc.getName()));
        params.add(new BasicNameValuePair("PIN", acc.getPassword()));
        html = httpGet(opac_url + "/" + doc.select("#MainForm").attr("action") + "?"
                + URLEncodedUtils.format(params, getDefaultEncoding()), getDefaultEncoding());
        doc = Jsoup.parse(html);
    }

    if (useraction == ReservationResult.ACTION_BRANCH) {
        List<NameValuePair> params = new ArrayList<NameValuePair>();
        for (Element input : doc.select("#MainForm input")) {
            if (!input.attr("name").equals("Confirm")) {
                params.add(new BasicNameValuePair(input.attr("name"), input.attr("value")));
            }

        }
        params.add(new BasicNameValuePair("MakeResTypeDef.Reservation.RecipientLocn", selection));
        params.add(new BasicNameValuePair("Confirm", "1"));
        html = httpGet(opac_url + "/" + doc.select("#MainForm").attr("action") + "?"
                + URLEncodedUtils.format(params, getDefaultEncoding()), getDefaultEncoding());
        return new ReservationResult(MultiStepResult.Status.OK);
    }

    if (useraction == 0) {
        ReservationResult res = null;
        for (Node n : doc.select("#MainForm").first().childNodes()) {
            if (n instanceof TextNode) {
                if (((TextNode) n).text().contains("Entgelt")) {
                    res = new ReservationResult(ReservationResult.Status.CONFIRMATION_NEEDED);
                    List<String[]> details = new ArrayList<String[]>();
                    details.add(new String[] { ((TextNode) n).text().trim() });
                    res.setDetails(details);
                    res.setMessage(((TextNode) n).text().trim());
                    res.setActionIdentifier(MultiStepResult.ACTION_CONFIRMATION);
                }
            }
        }
        if (res != null)
            return res;
    }
    if (doc.select("#MainForm select").size() > 0) {
        ReservationResult res = new ReservationResult(ReservationResult.Status.SELECTION_NEEDED);
        Map<String, String> sel = new HashMap<String, String>();
        for (Element opt : doc.select("#MainForm select option")) {
            sel.put(opt.attr("value"), opt.text().trim());
        }
        res.setSelection(sel);
        res.setMessage("Bitte Zweigstelle auswhlen");
        res.setActionIdentifier(ReservationResult.ACTION_BRANCH);
        return res;
    }

    return new ReservationResult(ReservationResult.Status.ERROR);
}

From source file:com.github.binlee1990.spider.movie.spider.MovieCrawler.java

private void setFilmRelated(Document doc, Film film) {
    Elements keyElements = doc.select(".fm-minfo dt");
    Elements valueElements = doc.select(".fm-minfo dd");
    if (CollectionUtils.isNotEmpty(keyElements) && CollectionUtils.isNotEmpty(valueElements)) {
        int keyI = 0;
        for (; keyI < keyElements.size(); keyI++) {
            Element keyElement = keyElements.get(keyI);
            Element valueElement = valueElements.get(keyI);

            if (null != keyElement && null != valueElement) {
                String key = StringUtils.trimToEmpty(keyElement.text().toString());
                if (StringUtils.isNotBlank(key)) {
                    String value = StringUtils.trimToEmpty(valueElement.text().toString());

                    if (StringUtils.equalsIgnoreCase(key, "")) {
                        Director director = createOrQueryDirector(value);
                        if (null != director) {
                            film.setDirectorId(director.getId());
                        }/*from   w w  w .  j a v a2s. co  m*/
                        film.setDirector(value);
                    }
                    if (StringUtils.equalsIgnoreCase(key, "")) {
                    }
                    if (StringUtils.equalsIgnoreCase(key, "")) {
                    }
                    if (StringUtils.equalsIgnoreCase(key, "")) {
                        String urlYear = getFilmUrlYear(doc, value);
                        if (StringUtils.isNotBlank(urlYear)) {
                            EnumYear enumYear = queryEnumYear(urlYear);
                            if (null != enumYear) {
                                film.setYearId(enumYear.getId());
                            }
                        }

                        Date releaseDate = getFilmReleaseDate(value);
                        if (null != releaseDate) {
                            film.setReleaseDate(releaseDate);
                        }
                    }
                    if (StringUtils.equalsIgnoreCase(key, "")) {
                        int length = getFilmLength(value);
                        film.setLength(length);
                    }
                    if (StringUtils.equalsIgnoreCase(key, "??")) {
                        if (StringUtils.isNotBlank(value)) {
                            film.setAlias(value);
                        }
                    }
                }
            }
        }
    }
}

From source file:de.geeksfactory.opacclient.apis.Heidi.java

@Override
public List<SearchField> getSearchFields() throws IOException, OpacErrorException, JSONException {
    String html = httpGet(opac_url + "/search.cgi?art=f", ENCODING, false, cookieStore);
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);//from  w w w.  j  av a 2s. c om
    List<SearchField> fields = new ArrayList<>();

    Elements options = doc.select("select[name=kat1] option");
    for (Element option : options) {
        TextSearchField field = new TextSearchField();
        field.setDisplayName(option.text());
        field.setId(option.attr("value"));
        field.setHint("");
        fields.add(field);
    }

    DropdownSearchField field = new DropdownSearchField();

    Elements zst_opts = doc.select("#teilk2 option");
    for (int i = 0; i < zst_opts.size(); i++) {
        Element opt = zst_opts.get(i);
        if (!opt.val().equals("")) {
            field.addDropdownValue(opt.val(), opt.text());
        }
    }
    field.setDisplayName("Einrichtung");
    field.setId("f[teil2]");
    field.setVisible(true);
    field.setMeaning(SearchField.Meaning.BRANCH);
    fields.add(field);

    try {
        field = new DropdownSearchField();
        Document doc2 = Jsoup
                .parse(httpGet(opac_url + "/zweigstelle.cgi?sess=" + sessid, ENCODING, false, cookieStore));
        Elements home_opts = doc2.select("#zweig option");
        for (int i = 0; i < home_opts.size(); i++) {
            Element opt = home_opts.get(i);
            if (!opt.val().equals("")) {
                Map<String, String> option = new HashMap<>();
                option.put("key", opt.val());
                option.put("value", opt.text());
                field.addDropdownValue(opt.val(), opt.text());
            }
        }
        field.setDisplayName("Leihstelle");
        field.setId("_heidi_branch");
        field.setVisible(true);
        field.setMeaning(SearchField.Meaning.HOME_BRANCH);
        fields.add(field);
    } catch (IOException e) {
        e.printStackTrace();
    }

    TextSearchField pagefield = new TextSearchField();
    pagefield.setId("_heidi_page");
    pagefield.setVisible(false);
    pagefield.setDisplayName("Seite");
    pagefield.setHint("");
    fields.add(pagefield);

    return fields;
}

From source file:me.vertretungsplan.parser.UntisCommonParser.java

void parseDay(SubstitutionScheduleDay day, Element next, SubstitutionSchedule v, String klasse)
        throws JSONException, CredentialInvalidException {
    if (next.className().equals("subst") || next.select(".list").size() > 0
            || next.text().contains("Vertretungen sind nicht freigegeben")
            || next.text().contains("Keine Vertretungen")) {
        //Vertretungstabelle
        if (next.text().contains("Vertretungen sind nicht freigegeben")) {
            return;
        }/*  ww w. j a v  a  2 s. c om*/
        parseSubstitutionScheduleTable(next, scheduleData.getData(), day, klasse);
    } else {
        //Nachrichten
        parseMessages(next, day);
        next = next.nextElementSibling().nextElementSibling();
        parseSubstitutionScheduleTable(next, scheduleData.getData(), day, klasse);
    }
    v.addDay(day);
}

From source file:de.geeksfactory.opacclient.apis.Pica.java

@Override
public List<SearchField> getSearchFields() throws IOException, JSONException {
    if (!initialised) {
        start();// w w w.j  a  v a  2s .  c om
    }

    String html = httpGet(opac_url + "/LNG=" + getLang() + "/DB=" + db + "/ADVANCED_SEARCHFILTER",
            getDefaultEncoding());
    Document doc = Jsoup.parse(html);
    List<SearchField> fields = new ArrayList<>();

    Elements options = doc.select("select[name=IKT0] option");
    for (Element option : options) {
        TextSearchField field = new TextSearchField();
        field.setDisplayName(option.text());
        field.setId(option.attr("value"));
        field.setHint("");
        field.setData(new JSONObject("{\"ADI\": false}"));

        Pattern pattern = Pattern.compile("\\[X?[A-Za-z]{2,3}:?\\]|\\(X?[A-Za-z]{2,3}:?\\)");
        Matcher matcher = pattern.matcher(field.getDisplayName());
        if (matcher.find()) {
            field.getData().put("meaning", matcher.group().replace(":", "").toUpperCase());
            field.setDisplayName(matcher.replaceFirst("").trim());
        }

        fields.add(field);
    }

    Elements sort = doc.select("select[name=SRT]");
    if (sort.size() > 0) {
        DropdownSearchField field = new DropdownSearchField();
        field.setDisplayName(sort.first().parent().parent().select(".longval").first().text());
        field.setId("SRT");
        for (Element option : sort.select("option")) {
            field.addDropdownValue(option.attr("value"), option.text());
        }
        fields.add(field);
    }

    for (Element input : doc.select("input[type=text][name^=ADI]")) {
        TextSearchField field = new TextSearchField();
        field.setDisplayName(input.parent().parent().select(".longkey").text());
        field.setId(input.attr("name"));
        field.setHint(input.parent().select("span").text());
        field.setData(new JSONObject("{\"ADI\": true}"));
        fields.add(field);
    }

    for (Element dropdown : doc.select("select[name^=ADI]")) {
        DropdownSearchField field = new DropdownSearchField();
        field.setDisplayName(dropdown.parent().parent().select(".longkey").text());
        field.setId(dropdown.attr("name"));
        for (Element option : dropdown.select("option")) {
            field.addDropdownValue(option.attr("value"), option.text());
        }
        fields.add(field);
    }

    Elements fuzzy = doc.select("input[name=FUZZY]");
    if (fuzzy.size() > 0) {
        CheckboxSearchField field = new CheckboxSearchField();
        field.setDisplayName(fuzzy.first().parent().parent().select(".longkey").first().text());
        field.setId("FUZZY");
        fields.add(field);
    }

    Elements mediatypes = doc.select("input[name=ADI_MAT]");
    if (mediatypes.size() > 0) {
        DropdownSearchField field = new DropdownSearchField();
        field.setDisplayName("Materialart");
        field.setId("ADI_MAT");

        field.addDropdownValue("", "Alle");
        for (Element mt : mediatypes) {
            field.addDropdownValue(mt.attr("value"),
                    mt.parent().nextElementSibling().text().replace("\u00a0", ""));
        }
        fields.add(field);
    }

    return fields;
}

From source file:eu.sisob.uma.extractors.adhoc.cvfilesinside.InternalCVFilesExtractor.java

/**
 *
 * @param input_file/*  w ww  .ja v a 2s.c om*/
 * @param data_dir
 * @param output_file
 * @param error_sw
 */
public static void extract_cv_files(File input_file, File data_dir,
        File output_file/*, File output_file_2, File results_dir,*/, StringWriter error_sw) {
    CSVReader reader = null;
    try {
        reader = new CSVReader(new FileReader(input_file), CSV_SEPARATOR);
    } catch (FileNotFoundException ex) {
        Logger.getRootLogger().error("Error reading " + input_file.getName() + " - " + ex.toString());
    }

    int idStaffIdentifier = -1;
    int idName = -1;
    int idFirstName = -1;
    int idLastName = -1;
    int idInitials = -1;
    int idUnitOfAssessment_Description = -1;
    int idInstitutionName = -1;
    int idWebAddress = -1;
    int idResearchGroupDescription = -1;
    int idResearcherWebAddress = -1;
    int idResearcherWebAddressType = -1;
    int idResearcherWebAddressExt = -1;
    int idScoreUrl = -1;
    int idEmail = -1;
    int idScoreEmail = -1;

    String[] nextLine;
    try {
        if ((nextLine = reader.readNext()) != null) {
            //Locate indexes            
            //Locate indexes                        
            for (int i = 0; i < nextLine.length; i++) {
                String column_name = nextLine[i];
                if (column_name.equals(FileFormatConversor.CSV_COL_ID))
                    idStaffIdentifier = i;
                else if (column_name.equals(FileFormatConversor.CSV_COL_NAME))
                    idName = i;
                else if (column_name.equals(FileFormatConversor.CSV_COL_FIRSTNAME))
                    idFirstName = i;
                else if (column_name.equals(FileFormatConversor.CSV_COL_LASTNAME))
                    idLastName = i;
                else if (column_name.equals(FileFormatConversor.CSV_COL_INITIALS))
                    idInitials = i;
                else if (column_name.equals(FileFormatConversor.CSV_COL_SUBJECT))
                    idUnitOfAssessment_Description = i;
                else if (column_name.equals(FileFormatConversor.CSV_COL_INSTITUTION_NAME))
                    idInstitutionName = i;
                else if (column_name.equals(FileFormatConversor.CSV_COL_INSTITUTION_URL))
                    idWebAddress = i;
                else if (column_name.equals(FileFormatConversor.CSV_COL_RESEARCHER_PAGE_URL))
                    idResearcherWebAddress = i;
                else if (column_name.equals(FileFormatConversor.CSV_COL_RESEARCHER_PAGE_TYPE))
                    idResearcherWebAddressType = i;
                else if (column_name.equals(FileFormatConversor.CSV_COL_RESEARCHER_PAGE_EXT))
                    idResearcherWebAddressExt = i;
                else if (column_name.equals(FileFormatConversor.CSV_COL_SCORE_URL))
                    idScoreUrl = i;
                else if (column_name.equals(FileFormatConversor.CSV_COL_EMAIL))
                    idEmail = i;
                else if (column_name.equals(FileFormatConversor.CSV_COL_SCORE_EMAIL))
                    idScoreEmail = i;
            }
        }
    } catch (Exception ex) {
        String error_msg = "Error reading headers of " + input_file.getName();
        Logger.getRootLogger().error(error_msg + " - " + ex.toString());
        if (error_sw != null)
            error_sw.append(error_msg + "\r\n");

        return;
    }

    if (idResearcherWebAddress != -1 && idResearcherWebAddressType != -1 && idResearcherWebAddressExt != -1
            && idStaffIdentifier != -1 && idLastName != -1 && idInitials != -1) {
        if (true) {
            try {
                String header = "";
                header += "\"" + FileFormatConversor.CSV_COL_ID + "\"" + CSV_SEPARATOR;
                header += "\"" + FileFormatConversor.CSV_COL_LASTNAME + "\"" + CSV_SEPARATOR;
                header += "\"" + FileFormatConversor.CSV_COL_INITIALS + "\"" + CSV_SEPARATOR;
                if (idFirstName != -1)
                    header += "\"" + FileFormatConversor.CSV_COL_FIRSTNAME + "\"" + CSV_SEPARATOR;
                if (idName != -1)
                    header += "\"" + FileFormatConversor.CSV_COL_NAME + "\"" + CSV_SEPARATOR;
                if (idEmail != -1)
                    header += "\"" + FileFormatConversor.CSV_COL_EMAIL + "\"" + CSV_SEPARATOR;
                if (idInstitutionName != -1)
                    header += "\"" + FileFormatConversor.CSV_COL_INSTITUTION_NAME + "\"" + CSV_SEPARATOR;
                if (idWebAddress != -1)
                    header += "\"" + FileFormatConversor.CSV_COL_INSTITUTION_URL + "\"" + CSV_SEPARATOR;
                header += "\"" + FileFormatConversor.CSV_COL_RESEARCHER_PAGE_URL + "\"" + CSV_SEPARATOR;
                header += "\"" + FileFormatConversor.CSV_COL_RESEARCHER_PAGE_EXT + "\"" + CSV_SEPARATOR;
                header += "\"" + FileFormatConversor.CSV_COL_RESEARCHER_PAGE_TYPE + "\"" + CSV_SEPARATOR;
                header += "\"" + FileFormatConversor.CSV_COL_SCORE_URL + "\"" + CSV_SEPARATOR;
                if (idScoreEmail != -1)
                    header += "\"" + FileFormatConversor.CSV_COL_SCORE_EMAIL + "\"" + CSV_SEPARATOR;
                header += "\r\n";
                FileUtils.write(output_file, header, "UTF-8", false);
                // DOWNLOAD HERE THE HOME PAGE 
                //FileUtils.write(output_file_2, header, "UTF-8", false);

            } catch (IOException ex) {
                Logger.getLogger("root").error(ex.toString());
                error_sw.append("Error creating output files\r\n");
            }
        }

        try {
            //                DOWNLOAD HERE THE HOME PAGE 
            //                if(!results_dir.exists())
            //                    results_dir.mkdirs();                
            //                File homepage_results_dirs = new File(results_dir, "HOMEPAGE");
            //                if(!homepage_results_dirs.exists())
            //                    homepage_results_dirs.mkdirs();
            //if(!test_only_output)
            {
                Pattern p1 = Pattern.compile("([a-zA-Z0-9#._-]+)+");

                while ((nextLine = reader.readNext()) != null) {
                    nextLine[idLastName] = nextLine[idLastName].replaceAll("[^a-zA-Z]", " ").toLowerCase();
                    nextLine[idInitials] = nextLine[idInitials].replaceAll("[^a-zA-Z]", " ").toLowerCase();
                    if (idFirstName != -1)
                        nextLine[idFirstName] = nextLine[idFirstName].replaceAll("[^a-zA-Z]", " ")
                                .toLowerCase();
                    if (idName != -1)
                        nextLine[idName] = nextLine[idName].replaceAll("[^a-zA-Z]", " ").toLowerCase();

                    Document content = null;
                    String researcher_page_url = nextLine[idResearcherWebAddress];
                    File temp_file = null;
                    if (p1.matcher(researcher_page_url).matches()) {

                    } else {

                        try {

                            Logger.getRootLogger().info("Reading " + researcher_page_url);

                            temp_file = File.createTempFile("internal-cv-files-", ".tmp");
                            URL fetched_url = Downloader.fetchURL(researcher_page_url);
                            FileUtils.copyURLToFile(fetched_url, temp_file);
                            long sizeInBytes = temp_file.length();
                            long sizeInMb = sizeInBytes / (1024 * 1024);
                            if (sizeInMb > 100) {
                                content = null;
                            } else {
                                String text_content = FileUtils.readFileToString(temp_file);
                                String check_string = "";
                                if (text_content.length() <= 100) {
                                    check_string = text_content.substring(0, text_content.length());
                                } else {
                                    check_string = text_content.substring(0, 100);
                                }
                                if (check_string.toLowerCase().contains("html")) {
                                    content = Jsoup.parse(text_content);
                                    content.setBaseUri(researcher_page_url);
                                    //                                          DOWNLOAD HERE THE HOME PAGE                                        
                                    //                                        String filename = nextLine[idStaffIdentifier] + "_HOMEPAGE_" + MD5(researcher_page_url) + ".html";
                                    //                                        FileUtils.copyFile(temp_file, new File(homepage_results_dirs, filename));                                        
                                    //                                        
                                    //                                        String result = "";                        
                                    //                                        result += "\"" + nextLine[idStaffIdentifier] + "\"" + CSV_SEPARATOR;
                                    //                                        result += "\"" + nextLine[idLastName] + "\"" + CSV_SEPARATOR;
                                    //                                        result += "\"" + nextLine[idInitials] + "\"" + CSV_SEPARATOR;                                    
                                    //                                        if(idFirstName != -1) result += "\"" + nextLine[idFirstName] + "\"" + CSV_SEPARATOR;  
                                    //                                        if(idName != -1) result += "\"" + nextLine[idName] + "\"" + CSV_SEPARATOR;  
                                    //                                        if(idEmail != -1) result += "\"" + nextLine[idEmail] + "\"" + CSV_SEPARATOR; 
                                    //                                        if(idInstitutionName != -1) result += "\"" + nextLine[idInstitutionName] + "\"" + CSV_SEPARATOR;  
                                    //                                        if(idWebAddress != -1) result += "\"" + nextLine[idWebAddress] + "\"" + CSV_SEPARATOR;                 
                                    //                                        result += "\"" + filename + "\"" + CSV_SEPARATOR;
                                    //                                        result += "\"" + nextLine[idResearcherWebAddressType] + "\"" + CSV_SEPARATOR;
                                    //                                        result += "\"" + nextLine[idResearcherWebAddressExt] + "\"" + CSV_SEPARATOR;
                                    //                                        result += "\"" + (idScoreUrl != -1 ? nextLine[idScoreUrl] : "") + "\"" + CSV_SEPARATOR;
                                    //                                        if(idScoreEmail != -1) result += "\"" + nextLine[idScoreEmail] + "\"" + CSV_SEPARATOR; 
                                    //                                        result += "\r\n";
                                    //
                                    //                                        try {
                                    //                                            FileUtils.write(output_file_2, result, "UTF-8", true);
                                    //                                        } catch (IOException ex) {
                                    //                                            Logger.getLogger("root").error(ex.toString());
                                    //                                        }
                                } else {
                                    throw new Exception(researcher_page_url + " is not html document");
                                }
                            }

                        } catch (Exception ex) {
                            Logger.getLogger("root").error("" + researcher_page_url + " could not loaded", ex);
                            error_sw.append("" + researcher_page_url + " could not loaded");
                            content = null;
                        } catch (java.lang.OutOfMemoryError ex2) {
                            Logger.getLogger("root")
                                    .error("" + researcher_page_url + " could not loaded (out of memory)", ex2);
                            error_sw.append("" + researcher_page_url + " could not loaded (out of memory)");
                            content = null;
                        } finally {
                            if (temp_file != null)
                                temp_file.delete();
                        }

                    }
                    //Add sources to output
                    {
                        String result = "";
                        result += "\"" + nextLine[idStaffIdentifier] + "\"" + CSV_SEPARATOR;
                        result += "\"" + nextLine[idLastName] + "\"" + CSV_SEPARATOR;
                        result += "\"" + nextLine[idInitials] + "\"" + CSV_SEPARATOR;
                        if (idFirstName != -1)
                            result += "\"" + nextLine[idFirstName] + "\"" + CSV_SEPARATOR;
                        if (idName != -1)
                            result += "\"" + nextLine[idName] + "\"" + CSV_SEPARATOR;
                        if (idEmail != -1)
                            result += "\"" + nextLine[idEmail] + "\"" + CSV_SEPARATOR;
                        if (idInstitutionName != -1)
                            result += "\"" + nextLine[idInstitutionName] + "\"" + CSV_SEPARATOR;
                        if (idWebAddress != -1)
                            result += "\"" + nextLine[idWebAddress] + "\"" + CSV_SEPARATOR;
                        result += "\"" + nextLine[idResearcherWebAddress] + "\"" + CSV_SEPARATOR;
                        result += "\"" + nextLine[idResearcherWebAddressExt] + "\"" + CSV_SEPARATOR;
                        result += "\"HOMEPAGE\"" + CSV_SEPARATOR;
                        result += "\"" + (idScoreUrl != -1 ? nextLine[idScoreUrl] : "") + "\"" + CSV_SEPARATOR;
                        if (idScoreEmail != -1)
                            result += "\"" + nextLine[idScoreEmail] + "\"" + CSV_SEPARATOR;
                        result += "\r\n";

                        try {
                            FileUtils.write(output_file, result, "UTF-8", true);
                        } catch (IOException ex) {
                            Logger.getLogger("root").error(ex.toString());
                        }
                    }

                    if (content != null) {

                        Elements links = content.select("a[href]");
                        Elements links_worepeat = new Elements();

                        for (Element link : links) {

                            boolean b = false;
                            for (Element link_worepeat : links_worepeat) {
                                if (link.absUrl("href").equals(link_worepeat.absUrl("href"))) {
                                    b = true;
                                    break;
                                }
                            }

                            if (!b)
                                links_worepeat.add(link);

                        }

                        for (Element link : links_worepeat) {

                            boolean b = false;
                            link.setBaseUri(researcher_page_url);
                            String clean_name_1 = link.text().replaceAll("[^\\w\\s]", "").toLowerCase();
                            for (String k : cv_keywords_in_name_list) {
                                if (clean_name_1.contains(k)) {
                                    b = true;
                                    break;
                                }
                            }
                            if (b) {
                                Logger.getRootLogger()
                                        .info("CV found " + link.absUrl("href") + " (" + link.text() + ")");
                                String href = link.absUrl("href");

                                String ext = "";
                                String score = "";
                                String type = "CV";

                                if (link.absUrl("href").endsWith(".pdf"))
                                    ext = "PDF";
                                else if (link.absUrl("href").endsWith(".doc"))
                                    ext = "DOC";
                                else if (link.absUrl("href").endsWith(".docx"))
                                    ext = "DOCX";
                                else if (link.absUrl("href").endsWith(".rtf"))
                                    ext = "RTF";
                                else if (link.absUrl("href").endsWith(".txt"))
                                    ext = "TXT";
                                else
                                    ext = "HTML";

                                if (ext.equals("HTML")) {
                                    score = "B";
                                } else {
                                    score = "A";
                                }

                                String result = "";
                                result += "\"" + nextLine[idStaffIdentifier] + "\"" + CSV_SEPARATOR;
                                result += "\"" + nextLine[idLastName] + "\"" + CSV_SEPARATOR;
                                result += "\"" + nextLine[idInitials] + "\"" + CSV_SEPARATOR;
                                if (idFirstName != -1)
                                    result += "\"" + nextLine[idFirstName] + "\"" + CSV_SEPARATOR;
                                if (idName != -1)
                                    result += "\"" + nextLine[idName] + "\"" + CSV_SEPARATOR;
                                if (idEmail != -1)
                                    result += "\"" + nextLine[idEmail] + "\"" + CSV_SEPARATOR;
                                if (idInstitutionName != -1)
                                    result += "\"" + nextLine[idInstitutionName] + "\"" + CSV_SEPARATOR;
                                if (idWebAddress != -1)
                                    result += "\"" + href + "\"" + CSV_SEPARATOR;
                                result += "\"" + href + "\"" + CSV_SEPARATOR;
                                result += "\"" + ext + "\"" + CSV_SEPARATOR;
                                result += "\"" + type + "\"" + CSV_SEPARATOR;
                                result += "\"" + score + "\"" + CSV_SEPARATOR;
                                if (idScoreEmail != -1)
                                    result += "\"" + nextLine[idScoreEmail] + "\"" + CSV_SEPARATOR;
                                result += "\r\n";

                                try {
                                    FileUtils.write(output_file, result, "UTF-8", true);
                                } catch (IOException ex) {
                                    Logger.getLogger("root").error(ex.toString());
                                }

                            }

                            b = false;
                            link.setBaseUri(researcher_page_url);
                            clean_name_1 = link.text().replaceAll("[^\\w\\s]", "").toLowerCase();
                            for (String k : pub_keywords_in_name_list) {
                                if (clean_name_1.contains(k)) {
                                    b = true;
                                    break;
                                }
                            }
                            if (b) {
                                Logger.getRootLogger()
                                        .info("PUB found " + link.absUrl("href") + " (" + link.text() + ")");
                                String href = link.absUrl("href");

                                String ext = "";
                                String score = "";
                                String type = "PUB";

                                if (link.absUrl("href").endsWith(".pdf"))
                                    ext = "PDF";
                                else if (link.absUrl("href").endsWith(".doc"))
                                    ext = "DOC";
                                else if (link.absUrl("href").endsWith(".docx"))
                                    ext = "DOCX";
                                else if (link.absUrl("href").endsWith(".rtf"))
                                    ext = "RTF";
                                else if (link.absUrl("href").endsWith(".txt"))
                                    ext = "TXT";
                                else
                                    ext = "HTML";

                                if (ext.equals("HTML")) {
                                    score = "-";
                                } else {
                                    score = "-";
                                }

                                String result = "";
                                result += "\"" + nextLine[idStaffIdentifier] + "\"" + CSV_SEPARATOR;
                                result += "\"" + nextLine[idLastName] + "\"" + CSV_SEPARATOR;
                                result += "\"" + nextLine[idInitials] + "\"" + CSV_SEPARATOR;
                                if (idFirstName != -1)
                                    result += "\"" + nextLine[idFirstName] + "\"" + CSV_SEPARATOR;
                                if (idName != -1)
                                    result += "\"" + nextLine[idName] + "\"" + CSV_SEPARATOR;
                                if (idEmail != -1)
                                    result += "\"" + nextLine[idEmail] + "\"" + CSV_SEPARATOR;
                                if (idInstitutionName != -1)
                                    result += "\"" + nextLine[idInstitutionName] + "\"" + CSV_SEPARATOR;
                                if (idWebAddress != -1)
                                    result += "\"" + href + "\"" + CSV_SEPARATOR;
                                result += "\"" + href + "\"" + CSV_SEPARATOR;
                                result += "\"" + ext + "\"" + CSV_SEPARATOR;
                                result += "\"" + type + "\"" + CSV_SEPARATOR;
                                result += "\"" + score + "\"" + CSV_SEPARATOR;
                                if (idScoreEmail != -1)
                                    result += "\"" + nextLine[idScoreEmail] + "\"" + CSV_SEPARATOR;
                                result += "\r\n";

                                try {
                                    FileUtils.write(output_file, result, "UTF-8", true);
                                } catch (IOException ex) {
                                    Logger.getLogger("root").error(ex.toString());
                                }

                            }
                        }

                    }
                }

                reader.close();

            }

            //                    reader = null;
            //                    try {
            //                        reader = new CSVReader(new FileReader(output_file), CSV_SEPARATOR);
            //                    } catch (FileNotFoundException ex) {
            //                        Logger.getRootLogger().error("Error reading " + input_file.getName() + " - " + ex.toString());
            //                    }
            //
            //                    reader.readNext();
            //
            //                    int newIdResearcherWebpage = 3;
            //                    if(idFirstName != -1) newIdResearcherWebpage++; 
            //                    if(idName != -1) newIdResearcherWebpage++; 
            //                    if(idEmail != -1) newIdResearcherWebpage++; 
            //                    if(idInstitutionName != -1) newIdResearcherWebpage++; 
            //                    if(idWebAddress != -1) newIdResearcherWebpage++; 
            //
            //                    List<Object[]> urls_times = new ArrayList<Object[]>();
            //                    while ((nextLine = reader.readNext()) != null) 
            //                    {
            //                        String url = nextLine[newIdResearcherWebpage];
            //
            //                        Object[] url_time = new Object[2];
            //                        url_time[0] = url;
            //                        boolean b = false;
            //                        for(Object[] u : urls_times){
            //                            if(u[0].equals(url_time[0])){
            //                                u[1] = (Integer)u[1] + 1;         
            //                                b = true;
            //                                break;
            //                            }
            //                        }
            //
            //                        if(!b){
            //                            url_time[1] = new Integer(1);
            //                            urls_times.add(url_time);
            //                        }
            //                    }            
            //
            //                    reader.close();                    

            //                try {
            //                    reader = new CSVReader(new FileReader(output_file), CSV_SEPARATOR);
            //                } catch (FileNotFoundException ex) {
            //                    Logger.getRootLogger().error("Error reading " + input_file.getName() + " - " + ex.toString());
            //                }
            //
            //                nextLine = reader.readNext();
            //                try {
            //                    for(int i = 0; i < nextLine.length; i++)
            //                        nextLine[i] = "\"" + nextLine[i] + "\"";
            //                    FileUtils.write(output_file, StringUtil.join(Arrays.asList(nextLine), ";") + "\r\n", "UTF-8", false);
            //                } catch (IOException ex) {
            //                    Logger.getLogger("root").error(ex.toString());
            //                }
            //                
            //                while ((nextLine = reader.readNext()) != null) 
            //                {
            //                    String url = nextLine[newIdResearcherWebpage];
            //                    boolean b = false;
            //                    for(Object[] u : urls_times){
            //                        if(u[0].equals(url) && ((Integer)u[1] == 1)){                                
            //                            b = true;
            //                            break;
            //                        }
            //                    }
            //                    
            //                    if(b){
            //                        try {
            //                            for(int i = 0; i < nextLine.length; i++)
            //                                nextLine[i] = "\"" + nextLine[i] + "\"";
            //                            FileUtils.write(output_file, StringUtil.join(Arrays.asList(nextLine), ";") + "\r\n", "UTF-8", true);
            //                        } catch (IOException ex) {
            //                            Logger.getLogger("root").error(ex.toString());
            //                        }
            //                    }
            //                }
            //                
            //                 reader.close();  

        } catch (Exception ex) {
            String error_msg = "Error extracting cv files from extractor " + input_file.getName();
            Logger.getRootLogger().error(error_msg + " - " + ex.toString());
            if (error_sw != null)
                error_sw.append(error_msg + "\r\n");
            return;
        }
    }
}

From source file:de.geeksfactory.opacclient.apis.Open.java

@Override
public List<SearchField> getSearchFields() throws IOException, OpacErrorException, JSONException {
    String url = opac_url + "/" + data.getJSONObject("urls").getString("advanced_search") + NO_MOBILE;
    Document doc = Jsoup.parse(httpGet(url, getDefaultEncoding()));

    Element table = doc.select(".ModOPENExtendedSearchModuleC table").first();

    List<SearchField> fields = new ArrayList<>();

    JSONObject selectable = new JSONObject();
    selectable.put("selectable", true);

    JSONObject notSelectable = new JSONObject();
    notSelectable.put("selectable", false);

    // Selectable search criteria
    Elements options = table.select("select[id$=FirstSearchField] option");
    for (Element option : options) {
        TextSearchField field = new TextSearchField();
        field.setId(option.val());
        field.setDisplayName(option.text());
        field.setData(selectable);/*from  w ww . ja  v a2s .  c o  m*/
        fields.add(field);
    }

    // More criteria
    Element moreHeader = table.select("span[id$=LblMoreCriterias]").parents().select("tr").first();
    if (moreHeader != null) {
        Elements siblings = moreHeader.siblingElements();
        int startIndex = moreHeader.elementSiblingIndex();
        for (int i = startIndex; i < siblings.size(); i++) {
            Element tr = siblings.get(i);
            if (tr.select("input, select").size() == 0)
                continue;

            if (tr.select("input[type=text]").size() == 1) {
                Element input = tr.select("input[type=text]").first();
                TextSearchField field = new TextSearchField();
                field.setId(input.attr("name"));
                field.setDisplayName(tr.select("span[id*=Lbl]").first().text());
                field.setData(notSelectable);
                if (tr.text().contains("nur Ziffern"))
                    field.setNumber(true);
                fields.add(field);
            } else if (tr.select("input[type=text]").size() == 2) {
                Element input1 = tr.select("input[type=text]").get(0);
                Element input2 = tr.select("input[type=text]").get(1);

                TextSearchField field1 = new TextSearchField();
                field1.setId(input1.attr("name"));
                field1.setDisplayName(tr.select("span[id*=Lbl]").first().text());
                field1.setData(notSelectable);
                if (tr.text().contains("nur Ziffern"))
                    field1.setNumber(true);
                fields.add(field1);

                TextSearchField field2 = new TextSearchField();
                field2.setId(input2.attr("name"));
                field2.setDisplayName(tr.select("span[id*=Lbl]").first().text());
                field2.setData(notSelectable);
                field2.setHalfWidth(true);
                if (tr.text().contains("nur Ziffern"))
                    field2.setNumber(true);
                fields.add(field2);
            } else if (tr.select("select").size() == 1) {
                Element select = tr.select("select").first();
                DropdownSearchField dropdown = new DropdownSearchField();
                dropdown.setId(select.attr("name"));
                dropdown.setDisplayName(tr.select("span[id*=Lbl]").first().text());
                List<DropdownSearchField.Option> values = new ArrayList<>();
                for (Element option : select.select("option")) {
                    DropdownSearchField.Option opt = new DropdownSearchField.Option(option.val(),
                            option.text());
                    values.add(opt);
                }
                dropdown.setDropdownValues(values);
                fields.add(dropdown);
            } else if (tr.select("input[type=checkbox]").size() == 1) {
                Element checkbox = tr.select("input[type=checkbox]").first();
                CheckboxSearchField field = new CheckboxSearchField();
                field.setId(checkbox.attr("name"));
                field.setDisplayName(tr.select("span[id*=Lbl]").first().text());
                fields.add(field);
            }
        }
    }
    return fields;
}