Example usage for org.jsoup.nodes Element html

List of usage examples for org.jsoup.nodes Element html

Introduction

In this page you can find the example usage for org.jsoup.nodes Element html.

Prototype

public String html() 

Source Link

Document

Retrieves the element's inner HTML.

Usage

From source file:com.github.hronom.scrape.dat.website.controllers.ScrapeButtonController.java

public void processByHtmlUnit() {
    // Disable fields in view.
    scrapeView.setWebsiteUrlTextFieldEnabled(false);
    scrapeView.setSelectorTextFieldEnabled(false);
    scrapeView.setScrapeButtonEnabled(false);
    scrapeView.setWorkInProgress(true);/*from ww  w .  jav  a2  s.c  o m*/
    scrapeView.setOutput("");

    scrapeView.setProgressBarTaskText("initializing");
    logger.info("Start processing...");
    long beginTime = System.currentTimeMillis();

    // Output input parameters.
    if (!scrapeView.getWebsiteUrl().isEmpty() && !scrapeView.getSelector().isEmpty()) {
        logger.info("Input parameters: \"" + scrapeView.getWebsiteUrl() + "\", \"" + scrapeView.getSelector()
                + "\", \"");
    }

    // Process.
    try {
        URL url = new URL(scrapeView.getWebsiteUrl());
        scrapeView.setProgressBarTaskText("requesting page");
        logger.info("Requesting page...");
        HtmlPage page = webClient.getPage(url);
        logger.info("Requesting of page completed.");

        scrapeView.setProgressBarTaskText("viewing page as XML");
        logger.info("View page as XML");
        String xml = page.asXml();

        // Unescape html.
        scrapeView.setProgressBarTaskText("unescaping HTML");
        logger.info("Unescape html");
        xml = StringEscapeUtils.unescapeHtml4(xml);

        logger.info("Get selector");
        String selector = scrapeView.getSelector();
        if (!xml.isEmpty() && !selector.isEmpty()) {
            scrapeView.setProgressBarTaskText("parsing HTML");
            logger.info("Parse HTML");
            Document doc = Jsoup.parse(xml);

            scrapeView.setProgressBarTaskText("selecting elements in HTML");
            logger.info("select elements in HTML");
            Elements selectedElements = doc.select(selector);

            if (!selectedElements.isEmpty()) {
                scrapeView.setProgressBarTaskText("parsing selected elements");
                logger.info("Parse extracted elements");
                StringBuilder sb = new StringBuilder();
                for (Element element : selectedElements) {
                    String body = element.html();
                    sb.append(body);
                    sb.append("\n");
                    sb.append("\n");
                }
                scrapeView.setOutput(sb.toString());
            }
        }
    } catch (Exception e) {
        logger.error(e);
    }

    webClient.close();

    long endTime = System.currentTimeMillis();
    logger.info("Process time: " + (endTime - beginTime) + " ms.");
    logger.info("Processing complete.");

    // Enable fields in view.
    scrapeView.setWorkInProgress(false);
    scrapeView.setScrapeButtonEnabled(true);
    scrapeView.setSelectorTextFieldEnabled(true);
    scrapeView.setWebsiteUrlTextFieldEnabled(true);
}

From source file:net.pixomania.crawler.W3C.parser.rules.editors.EditorsRule6.java

@Override
public ArrayList<Person> run(String url, Document doc) {
    ArrayList<Person> editorList = new ArrayList<>();

    Element editor = null;
    try {// w  ww.  j av  a  2s.  c  om
        editor = doc.select(".authlist").get(0).select("p").get(0);
    } catch (IndexOutOfBoundsException e) {
        try {
            editor = doc.select("h4:contains(Editors) ~ p").get(0);
        } catch (IndexOutOfBoundsException e1) {
            return null;
        }
    }

    String[] splitted = editor.html().split("<br />");
    if (splitted.length < 2)
        splitted = editor.html().split("<br clear=\"none\" />");

    for (String split : splitted) {
        if (!split.isEmpty()) {
            if (split.equals("WHATWG:") || split.equals("W3C:"))
                continue;
            Document newdoc = Jsoup.parse(split.replaceAll("\n", ""));
            Person result = NameParser.parse(newdoc.text());
            if (result == null)
                continue;

            for (int i = 0; i < newdoc.select("a").size(); i++) {
                if (!newdoc.select("a").get(i).attr("href").isEmpty()) {
                    if (newdoc.select("a").get(i).attr("href").contains("@")) {
                        result.setEmail(newdoc.select("a").get(i).attr("href").replace("mailto:", ""));
                    } else {
                        result.addWebsite(newdoc.select("a").get(i).attr("href"));
                    }
                }
            }

            editorList.add(result);
        }
    }

    if (editorList.size() == 0)
        return null;

    return editorList;
}

From source file:com.kantenkugel.discordbot.jdocparser.JDocParser.java

static void parse(final String jdocBase, final String name, final InputStream inputStream,
        Map<String, ClassDocumentation> docs) {
    final String[] pathSplits = name.split("/");
    final String fileName = pathSplits[pathSplits.length - 1];
    if (!Character.isUpperCase(fileName.charAt(0))) {
        //ignore jdoc structure html
        return;/* w  w  w.  j av  a2 s  .  com*/
    }
    final String[] nameSplits = fileName.split("\\.");
    final String className = nameSplits[nameSplits.length - 2];
    final String fullName = fileName.substring(0,
            fileName.length() - nameSplits[nameSplits.length - 1].length() - 1);
    try (BufferedReader buffer = new BufferedReader(new InputStreamReader(inputStream))) {
        //create dom Document
        final String content = buffer.lines().collect(Collectors.joining("\n"));
        Document document = Jsoup.parse(content);

        //classDocument (classname, package, description)
        Element titleElem = getSingleElementByClass(document, "title");
        final String classSig = JDocUtil.fixSpaces(titleElem.text());
        Element packageElem = titleElem.previousElementSibling();
        if (packageElem.children().size() > 1) {
            packageElem = packageElem.children().last();
        }
        final String pack = JDocUtil.fixSpaces(packageElem.text());
        final String link = JDocUtil.getLink(jdocBase, pack, fullName);
        Element descriptionElement = null;
        Elements descriptionCandidates = document.select(".description .block");
        if (descriptionCandidates.size() > 1) {
            List<Element> removed = descriptionCandidates.stream().map(elem -> elem.child(0))
                    .filter(child -> child != null && !child.className().startsWith("deprecat"))
                    .map(Element::parent).collect(Collectors.toList());
            if (removed.size() != 1)
                throw new RuntimeException("Found too many description candidates");
            descriptionElement = removed.get(0);
        } else if (descriptionCandidates.size() == 1) {
            descriptionElement = descriptionCandidates.get(0);
        }
        final String description = descriptionElement == null ? ""
                : JDocUtil.formatText(descriptionElement.html(), link);
        final ClassDocumentation classDoc = new ClassDocumentation(pack, fullName, classSig, description,
                classSig.startsWith("Enum"));

        //methods, fields
        final Element details = document.getElementsByClass("details").first();
        if (details != null) {
            //methods
            Element tmp = getSingleElementByQuery(details, "a[name=\"method.detail\"]");
            List<DocBlock> docBlock = getDocBlock(jdocBase, tmp, classDoc);
            if (docBlock != null) {
                for (DocBlock block : docBlock) {
                    Set<MethodDocumentation> mdocs = classDoc.methodDocs
                            .computeIfAbsent(block.title.toLowerCase(), key -> new HashSet<>());
                    mdocs.add(new MethodDocumentation(classDoc, block.signature, block.hashLink,
                            block.description, block.fields));
                }
            }
            //vars
            tmp = getSingleElementByQuery(details, "a[name=\"field.detail\"]");
            docBlock = getDocBlock(jdocBase, tmp, classDoc);
            if (docBlock != null) {
                for (DocBlock block : docBlock) {
                    classDoc.classValues.put(block.title.toLowerCase(), new ValueDocumentation(classDoc,
                            block.title, block.hashLink, block.signature, block.description));
                }
            }
            //enum-values
            tmp = getSingleElementByQuery(details, "a[name=\"enum.constant.detail\"]");
            docBlock = getDocBlock(jdocBase, tmp, classDoc);
            if (docBlock != null) {
                for (DocBlock block : docBlock) {
                    classDoc.classValues.put(block.title.toLowerCase(), new ValueDocumentation(classDoc,
                            block.title, block.hashLink, block.signature, block.description));
                }
            }
        }
        final Element methodSummary = getSingleElementByQuery(document, "a[name=\"method.summary\"]");
        classDoc.inheritedMethods.putAll(getInheritedMethods(methodSummary));

        //storing
        if (nameSplits.length > 2) {
            if (!docs.containsKey(nameSplits[0].toLowerCase()))
                docs.put(nameSplits[0].toLowerCase(), new ClassDocumentation(null, null, null, null, false));
            ClassDocumentation parent = docs.get(nameSplits[0].toLowerCase());
            for (int i = 1; i < nameSplits.length - 2; i++) {
                if (!parent.subClasses.containsKey(nameSplits[i].toLowerCase()))
                    parent.subClasses.put(nameSplits[i].toLowerCase(),
                            new ClassDocumentation(null, null, null, null, false));
                parent = parent.subClasses.get(nameSplits[i].toLowerCase());
            }
            if (parent.subClasses.containsKey(className.toLowerCase()))
                classDoc.subClasses.putAll(parent.subClasses.get(className.toLowerCase()).subClasses);
            parent.subClasses.put(className.toLowerCase(), classDoc);
        }
        if (docs.containsKey(fullName.toLowerCase())) {
            ClassDocumentation current = docs.get(fullName.toLowerCase());
            if (current.classSig != null)
                throw new RuntimeException("Got a class-name conflict with classes " + classDoc.classSig + "("
                        + classDoc.className + ") AND " + current.classSig + "(" + current.className + ")");
            classDoc.subClasses.putAll(current.subClasses);
        }
        docs.put(fullName.toLowerCase(), classDoc);
    } catch (final IOException | NullPointerException ex) {
        JDocUtil.LOG.error("Got excaption for element {}", fullName, ex);
    }
    try {
        inputStream.close();
    } catch (final IOException e) {
        JDocUtil.LOG.error("Error closing inputstream", e);
    }
}

From source file:accountgen.controller.Controller.java

private void setAddress(Document doc, Person p) {
    Elements e = doc.getElementsByClass("address");
    Element ad = e.select(".adr").first();
    Address address = new Address();
    String streetnumber = StringEscapeUtils
            .unescapeHtml4(/*ww  w  .  jav  a2 s. c o  m*/
                    ad.html().split("<br />")[0].split(" ")[ad.html().split("<br />")[0].split(" ").length - 1])
            .trim();
    String state = StringEscapeUtils
            .unescapeHtml4(
                    ad.html().split("<br />")[1].split(" ")[ad.html().split("<br />")[1].split(" ").length - 1])
            .trim();
    address.setStreetnumber(streetnumber);
    address.setStreetname(StringEscapeUtils.unescapeHtml4(ad.html().split(streetnumber)[0]).trim());
    address.setState(state);
    address.setPostcode(
            StringEscapeUtils.unescapeHtml4(ad.html().split("<br />")[1].split(state)[0]).trim().split(" ")[0]);
    address.setCountry(Consts.COUNTRY);
    p.setAdress(address);
}

From source file:mx.itdurango.rober.siitdocentes.asynctasks.GruposTask.java

/**
 * Procesa el html resultante de la peticin del listado de grupos descomponiendolo y asignandolo a un ArrayList
 *
 * @param html cuerpo html del resultado de la peticin
 *//* w ww .  j a  v a2  s  .c  o m*/
public void procesa(String html) {
    //se genera un documento donde se almacena el contenido html listo para ser procesado.
    Document doc = Jsoup.parse(html);
    //se obtiene la tabla donde se encuentra el contenido que interesa
    Element tabla = doc.getElementsByTag("table").get(0);
    //se obtienen todos los renglones de la tabla
    Elements renglones = tabla.getElementsByTag("tr");
    //arraylist que almacenar la informacin de los grupos
    ArrayList<Grupos> gcs = new ArrayList<Grupos>();
    //se recorre cada renglon almacenandolo en un objeto
    for (Element tr : renglones) {
        //se obtienen todos los elementos td de cada renglon.
        Elements tds = tr.getElementsByTag("td");
        //lleva el control de la columna que se est evaluando
        int l = 1;
        //objeto para lmacenar la informacion de cada grupo
        Grupos gc = new Grupos();
        //se recorren todos los elementos td del renglon actual
        for (Element td : tds) {
            //en el renglon 1 se encuentra la informacion del grupo con el siguiente formato
            //<b> CLAVE_MATERIA  </b> <br> NOMBRE DE LA MATERIA
            if (l == 1) {
                //se obtiene el contenido de la celda
                String datos = td.html();
                //eliminar las etiquetas de inicio de negritas
                datos = datos.replaceAll("<b>", "");
                //separar la cadena para tener en la posicin 0 la clave de la materia y en la posicion 1 el nombre de la misma.
                String m[] = datos.split("</b> <br />");
                gc.setClave(m[0]); //se asigna la clave de la materia al campo correspondiente
                gc.setNombre(m[1]);//se asigna el nombre de la materia al campo correspondiente
            } else if (l == 2) { //en la columna 2 se encuentra el grupo
                gc.setGrupo(td.html());
            } else if (l == 3) { //en la columna 3 se encuentra el numero de alumnos inscritos
                gc.setAlumnos(td.html());
            } else if (l == 4) { //en la columna 4 se encuentran los vinculos para asignar calificaciones parciales con el siguiente formato
                // <img src="http://siit.itdurango.edu.mx/img/iconos/captura_calif.gif"
                // onclick="window.location = &quot;calificaciones_parciales.php?periodo=20141&amp;materia=IT8851&amp;grupo=8TA&quot;"
                // alt="Captura de Informacin" style="cursor:pointer">

                //tomamos el contenido de la celda
                String params = td.html();
                //si separamos mediante la cadena "&quot;" podemos obtener solamente la url con parmetros que se tiene que mandar llamar
                String separado[] = params.split("&quot;");
                params = separado[1]; // solo los parametros
                params = params.replaceAll("&amp;", "&");
                //asignar la url a su campo correspondiente
                gc.setUrl(params);
            }
            //incrementar el numero de columna
            l++;
        }
        //si la clave es nula significa que no es una materia, probablemente sea el encabezado de la tabla
        if (gc.getClave() != null)
            gcs.add(gc);
    }
    //se genera un adapter nuevo con la informacin obtenida para ser asignado al listview de grupos.
    context.lvGrupos.setAdapter(new GruposAdapter(context, R.layout.item_grupos, gcs));
}

From source file:me.vertretungsplan.parser.SVPlanParser.java

private void parseSvPlanDay(SubstitutionSchedule v, Element svp, Document doc) throws IOException {
    SubstitutionScheduleDay day = new SubstitutionScheduleDay();
    if ((svp.select(".svp-plandatum-heute, .svp-plandatum-morgen, .Titel").size() > 0
            || doc.title().startsWith("Vertretungsplan fr "))) {
        setDate(svp, doc, day);/*w  w  w  .ja va2 s. co m*/
        if (svp.select(".svp-tabelle, table:has(.Klasse)").size() > 0) {

            Elements rows = svp.select(".svp-tabelle tr, table:has(.Klasse) tr");
            String lastLesson = "";
            String lastClass = "";
            for (Element row : rows) {
                if ((doc.select(".svp-header").size() > 0 && row.hasClass("svp-header"))
                        || row.select("th").size() > 0 || row.text().trim().equals("")) {
                    continue;
                }

                Substitution substitution = new Substitution();

                for (Element column : row.select("td")) {
                    String type = column.className();
                    if (!hasData(column.text())) {
                        if ((type.startsWith("svp-stunde") || type.startsWith("Stunde"))
                                && hasData(lastLesson)) {
                            substitution.setLesson(lastLesson);
                        } else if ((type.startsWith("svp-klasse") || type.startsWith("Klasse"))
                                && hasData(lastClass)) {
                            substitution.getClasses().addAll(Arrays
                                    .asList(lastClass.split(data.optString(PARAM_CLASS_SEPARATOR, ", "))));
                        }
                        continue;
                    }
                    if (type.startsWith("svp-stunde") || type.startsWith("Stunde")) {
                        substitution.setLesson(column.text());
                        lastLesson = column.text();
                    } else if (type.startsWith("svp-klasse") || type.startsWith("Klasse")) {
                        substitution.getClasses().addAll(Arrays
                                .asList(column.text().split(data.optString(PARAM_CLASS_SEPARATOR, ", "))));
                        lastClass = column.text();
                    } else if (type.startsWith("svp-esfehlt") || type.startsWith("Lehrer")) {
                        if (!data.optBoolean(PARAM_EXCLUDE_TEACHERS)) {
                            substitution.setPreviousTeacher(column.text());
                        }
                    } else if (type.startsWith("svp-esvertritt") || type.startsWith("Vertretung")) {
                        if (!data.optBoolean(PARAM_EXCLUDE_TEACHERS)) {
                            substitution.setTeacher(column.text().replaceAll(" \\+$", ""));
                        }
                    } else if (type.startsWith("svp-fach") || type.startsWith("Fach")) {
                        substitution.setSubject(column.text());
                    } else if (type.startsWith("svp-bemerkung") || type.startsWith("Anmerkung")) {
                        substitution.setDesc(column.text());
                        String recognizedType = recognizeType(column.text());
                        substitution.setType(recognizedType);
                        substitution.setColor(colorProvider.getColor(recognizedType));
                    } else if (type.startsWith("svp-raum") || type.startsWith("Raum")) {
                        substitution.setRoom(column.text());
                    }
                }

                if (substitution.getType() == null) {
                    substitution.setType("Vertretung");
                    substitution.setColor(colorProvider.getColor("Vertretung"));
                }

                day.addSubstitution(substitution);
            }
        }
        if (svp.select(".LehrerVerplant").size() > 0) {
            day.addMessage("<b>Verplante Lehrer:</b> " + svp.select(".LehrerVerplant").text());
        }
        if (svp.select(".Abwesenheiten").size() > 0) {
            day.addMessage("<b>Abwesenheiten:</b> " + svp.select(".Abwesenheiten").text());
        }

        if (svp.select("h2:contains(Mitteilungen)").size() > 0) {
            Element h2 = svp.select("h2:contains(Mitteilungen)").first();
            Element sibling = h2.nextElementSibling();
            while (sibling != null && sibling.tagName().equals("p")) {
                for (String nachricht : TextNode.createFromEncoded(sibling.html(), null).getWholeText()
                        .split("<br />\\s*<br />")) {
                    if (hasData(nachricht))
                        day.addMessage(nachricht);
                }
                sibling = sibling.nextElementSibling();
            }
        } else if (svp.select(".Mitteilungen").size() > 0) {
            for (Element p : svp.select(".Mitteilungen")) {
                for (String nachricht : TextNode.createFromEncoded(p.html(), null).getWholeText()
                        .split("<br />\\s*<br />")) {
                    if (hasData(nachricht))
                        day.addMessage(nachricht);
                }
            }
        }
        v.addDay(day);
    } else {
        throw new IOException("keine SVPlan-Tabelle gefunden");
    }
}

From source file:eu.masconsult.bgbanking.banks.sgexpress.SGExpressClient.java

private RawBankAccount obtainBankAccountFromHtmlTableRow(String type, Element row) {
    if ("detail".equalsIgnoreCase(row.attr("class"))) {
        // detail row
        return null;
    }/*from  w w  w . j a  va 2 s .com*/

    if ("bg0".equalsIgnoreCase(row.attr("class"))) {
        Log.v(TAG, "working row(" + type + "): " + row.html());

        if ("Current Accounts".equalsIgnoreCase(type)) {
            return new RawBankAccount().setServerId(row.child(2).text()).setName(row.child(0).child(0).text())
                    .setIBAN(row.child(2).text()).setCurrency(row.child(1).text())
                    .setBalance(Convert.strToFloat(row.child(3).text()))
                    .setAvailableBalance(Convert.strToFloat(row.child(4).text()));
        } else if ("Cards".equalsIgnoreCase(type)) {
            // skip cards for now
            return null;
        } else {
            // unknown type
            return null;
        }
    } else {
        return null;
    }
}

From source file:org.abondar.experimental.eventsearch.EventFinder.java

public void getEvent(String eventId, String evType) {
    try {// www .  ja  va  2s.c  o  m

        Document dc = Jsoup.connect("https://afisha.yandex.ru/msk/events/" + eventId + "/").get();

        Event eb = new Event();
        eb.setEventID(eventId);
        eb.setCategory(eventTypes.get(evType));
        Elements elems = dc.select("meta");

        for (Element e : elems) {
            if (e.attributes().get("property").contains("og:description")) {
                eb.setDescription(e.attributes().get("content"));

            }

        }

        elems = dc.select("title");

        for (Element e : elems) {

            eb.setName(e.html().substring(0, e.html().indexOf("")));
        }

        elems = dc.select("a[href]");

        for (Element e : elems) {

            for (Attribute attr : e.attributes().asList()) {

                if (attr.getValue().contains("/msk/places/")) {

                    eb.setPlace(getEventPlaces(attr.getValue()));

                }
            }

        }

        elems = dc.select("tr[id]");

        for (Element e : elems) {
            for (Attribute attr : e.attributes().asList()) {

                if (attr.getValue().contains("f")) {

                    eb.setDate(e.children().first().html());

                    try {
                        Element e1 = e.child(1).children().first();
                        Element e2 = e1.children().first();
                        Element e3 = e2.children().first();
                        Element e4 = e3.children().first();

                        eb.setTime(e4.html());

                    } catch (NullPointerException ex) {

                        Element e1 = e.child(2).children().first();
                        Element e2 = e1.children().first();
                        Element e3 = e2.children().first();
                        Element e4 = e3.children().first();
                        eb.setTime(e4.html());
                    }
                }
            }

        }

        geoCode(eb);
        formJson(eb);

    } catch (IOException ex) {
        Logger.getLogger(EventFinder.class.getName()).log(Level.SEVERE, null, ex);
    }

}

From source file:eu.sisob.uma.NPL.Researchers.GateResearcherAnnCollector.java

@SuppressWarnings("unchecked")
private void writeResultsInHTMLFile(Document doc, File file_result) {

    String startTagPart_1 = "<br><span GateID=\"";
    String startTagPart_2 = "\" title=\"";
    String startTagPart_3 = "\" style=\"background:LightBlue;\">";
    String endTag = "</span><br>";

    AnnotationSet defaultAnnotSet = doc.getAnnotations();
    Set annotTypesRequired = new HashSet();

    annotTypesRequired.add("ProfessionalActivityCurrent");
    annotTypesRequired.add("ProfessionalActivityNoCurrent");
    annotTypesRequired.add("AccreditedUniversityStudiesOtherPostGrade");
    annotTypesRequired.add("AccreditedUniversityStudiesDegree");
    annotTypesRequired.add("AccreditedUniversityStudiesPhDStudies");
    annotTypesRequired.add("AgentIdentification");

    Set<Annotation> peopleAndPlaces = new HashSet<Annotation>(defaultAnnotSet.get(annotTypesRequired));

    FeatureMap features = doc.getFeatures();
    String originalContent = doc.getContent().toString();
    //(String) features.get(GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME);
    //RepositioningInfo info = (RepositioningInfo)
    //  features.get(GateConstants.DOCUMENT_REPOSITIONING_INFO_FEATURE_NAME);                       

    String xmlDocument = doc.toXml(peopleAndPlaces, true);

    String css_code = "<style type=\"text/css\">" + "span.AgentIdentification" + "{"
            + "   background-color: #808080;" + "} " + "span.AccreditedUniversityStudiesPhDStudies" + "{"
            + "   background-color: #FFFFCC;" + "} " + "span.AccreditedUniversityStudiesDegree" + "{"
            + "   background-color: #CCFFCC;" + "} " + "span.AccreditedUniversityStudiesOtherPostGrade" + "{"
            + "   background-color: #C17128;" + "} " + "span.ProfessionalActivityNoCurrent" + "{"
            + "   background-color: #99CCCC;" + "} " + "span.ProfessionalActivityCurrent" + "{"
            + "   background-color: #FF99CC;" + "} "
            + ".fixed {position:fixed !important; right:0px; top:0px; z-index:10 !important; background-color: #ffffff;} "
            + "</style>";

    String legend = "<div class=\"fixed\">NOTES:<br>";
    legend = legend/*from www. j a  v  a2  s  .  co m*/
            + "|1| = <span class=\"AccreditedUniversityStudiesPhDStudies\">AccreditedUniversityStudiesPhDStudies";
    legend = legend + "</span><br>";

    legend = legend
            + "|2| = <span class=\"AccreditedUniversityStudiesDegree\">AccreditedUniversityStudiesDegree";
    legend = legend + "</span><br>";

    legend = legend
            + "|3| = <span class=\"AccreditedUniversityStudiesOtherPostGrade\">AccreditedUniversityStudiesOtherPostGrade";
    legend = legend + "</span><br>";

    legend = legend + "|4| = <span class=\"ProfessionalActivityNoCurrent\">ProfessionalActivityNoCurrent";
    legend = legend + "</span><br>";

    legend = legend + "|5| = <span class=\"ProfessionalActivityCurrent\">ProfessionalActivityCurrent";
    legend = legend + "</span><br>";

    legend = legend + "|6| = <span class=\"AgentIdentification\">AgentIdentification";
    legend = legend + "</span></div><br><br><br><br><br>";

    int index1 = xmlDocument.indexOf("</head>");
    if (index1 > 0) {
        xmlDocument = xmlDocument.replace("</head>", "</head>" + css_code + legend);
    } else {
        xmlDocument = css_code + legend + xmlDocument;
    }

    {
        org.jsoup.nodes.Document docjsoup = org.jsoup.Jsoup.parse(xmlDocument);
        org.jsoup.select.Elements elements = docjsoup.select("AccreditedUniversityStudiesDegree");
        if (elements != null) {
            for (org.jsoup.nodes.Element element : elements) {
                String s = element.html();
                s = s;

            }
        }
    }

    //      xmlDocument = xmlDocument.replace("<AccreditedUniversityStudiesPhDStudies","<b>#SP#</b><span class=\"AccreditedUniversityStudiesPhDStudies\"");
    //      xmlDocument = xmlDocument.replace("</AccreditedUniversityStudiesPhDStudies>","</span><b>#SP#</b>");
    //
    //      xmlDocument = xmlDocument.replace("<AccreditedUniversityStudiesDegree","<b>#SD#</b><span class=\"AccreditedUniversityStudiesDegree\"");
    //      xmlDocument = xmlDocument.replace("</AccreditedUniversityStudiesDegree>","</span><b>#SD#</b>");
    //
    //      xmlDocument = xmlDocument.replace("<AccreditedUniversityStudiesOtherPostGrade","<b>#SO#</b><span class=\"AccreditedUniversityStudiesPhDStudies\"");
    //      xmlDocument = xmlDocument.replace("</AccreditedUniversityStudiesOtherPostGrade>","</span><b>#SO#</b>");
    //
    //      xmlDocument = xmlDocument.replace("<ProfessionalActivityNoCurrent","<b>#</b><span class=\"ProfessionalActivityNoCurrent\"");
    //      xmlDocument = xmlDocument.replace("</ProfessionalActivityNoCurrent>","</span><b>#PN#</b>");
    //
    //      xmlDocument = xmlDocument.replace("<ProfessionalActivityCurrent","<b>#</b><span class=\"ProfessionalActivityCurrent\"");
    //      xmlDocument = xmlDocument.replace("</ProfessionalActivityCurrent>","</span><b>#PC#</b>");   

    //      xmlDocument = xmlDocument.replace("<AccreditedUniversityStudiesPhDStudies","<span class=\"AccreditedUniversityStudiesPhDStudies\"");
    //      xmlDocument = xmlDocument.replace("</AccreditedUniversityStudiesPhDStudies>","</span>");
    //
    //      xmlDocument = xmlDocument.replace("<AccreditedUniversityStudiesDegree","<span class=\"AccreditedUniversityStudiesDegree\"");
    //      xmlDocument = xmlDocument.replace("</AccreditedUniversityStudiesDegree>","</span>");
    //
    //      xmlDocument = xmlDocument.replace("<AccreditedUniversityStudiesOtherPostGrade","<span class=\"AccreditedUniversityStudiesPhDStudies\"");
    //      xmlDocument = xmlDocument.replace("</AccreditedUniversityStudiesOtherPostGrade>","</span>");
    //
    //      xmlDocument = xmlDocument.replace("<ProfessionalActivityNoCurrent","<span class=\"ProfessionalActivityNoCurrent\"");
    //      xmlDocument = xmlDocument.replace("</ProfessionalActivityNoCurrent>","</span>");
    //
    //      xmlDocument = xmlDocument.replace("<ProfessionalActivityCurrent","<span class=\"ProfessionalActivityCurrent\"");
    //      xmlDocument = xmlDocument.replace("</ProfessionalActivityCurrent>","</span>");      

    xmlDocument = xmlDocument.replace("<AccreditedUniversityStudiesPhDStudies",
            "<b>|1|</b><span class=\"AccreditedUniversityStudiesPhDStudies\"");
    xmlDocument = xmlDocument.replace("</AccreditedUniversityStudiesPhDStudies>", "</span><b>|1|</b>");

    xmlDocument = xmlDocument.replace("<AccreditedUniversityStudiesDegree",
            "<b>|2|</b><span class=\"AccreditedUniversityStudiesDegree\"");
    xmlDocument = xmlDocument.replace("</AccreditedUniversityStudiesDegree>", "</span><b>|2|</b>");

    xmlDocument = xmlDocument.replace("<AccreditedUniversityStudiesOtherPostGrade",
            "<b>|3|</b><span class=\"AccreditedUniversityStudiesPhDStudies\"");
    xmlDocument = xmlDocument.replace("</AccreditedUniversityStudiesOtherPostGrade>", "</span><b>|3|</b>");

    xmlDocument = xmlDocument.replace("<ProfessionalActivityNoCurrent",
            "<b>|4|</b><span class=\"ProfessionalActivityNoCurrent\"");
    xmlDocument = xmlDocument.replace("</ProfessionalActivityNoCurrent>", "</span><b>|4|</b>");

    xmlDocument = xmlDocument.replace("<ProfessionalActivityCurrent",
            "<b>|5|</b><span class=\"ProfessionalActivityCurrent\"");
    xmlDocument = xmlDocument.replace("</ProfessionalActivityCurrent>", "</span><b>|5|</b>");

    xmlDocument = xmlDocument.replace("<AgentIdentification", "<b>|6|</b><span class=\"AgentIdentification\"");
    xmlDocument = xmlDocument.replace("</AgentIdentification>", "</span><b>|6|</b>");

    xmlDocument = xmlDocument.replace("\n", "<br>");
    try {
        FileUtils.write(file_result, xmlDocument, "UTF-8");
    } catch (IOException ex) {
        ProjectLogger.LOGGER.error("The verbose file can not be created " + file_result.getPath(), ex);
    }
}

From source file:org.abondar.experimental.eventsearch.EventFinder.java

public String getEventPlaces(String place) {

    String res = "";
    try {//from w  w w . j a  va 2 s.  co  m
        Document placeDoc = Jsoup.connect("https://afisha.yandex.ru" + place).get();

        Elements elems = placeDoc.select("p");

        for (Element e : elems) {

            if (e.parents().get(1).html().contains("<div style")) {

                if (e.children().size() > 1) {
                    if (e.child(1).hasAttr("href")) {
                        res = e.child(1).html() + " ?";

                    }
                } else if (e.children().isEmpty()) {
                    res = e.html() + " ?";
                }
            }

        }

    } catch (IOException ex) {
        Logger.getLogger(EventFinder.class.getName()).log(Level.SEVERE, null, ex);
    }
    return res;
}