Example usage for org.jsoup.nodes Element attr

List of usage examples for org.jsoup.nodes Element attr

Introduction

In this page you can find the example usage for org.jsoup.nodes Element attr.

Prototype

public String attr(String attributeKey) 

Source Link

Document

Get an attribute's value by its key.

Usage

From source file:io.seldon.importer.articles.AttributesImporterUtils.java

public static List<String> getTagsPartsFromSingleElement(Element tagsElement) {
    String tagsRaw = tagsElement.attr("content");
    String[] parts = tagsRaw.split(",");
    for (int i = 0; i < parts.length; i++)
        parts[i] = parts[i].trim().toLowerCase();

    List<String> tagsParts = (parts != null) ? new ArrayList<String>(Arrays.asList(parts))
            : new ArrayList<String>();

    return tagsParts;
}

From source file:FILER.java

public static String[] Dealing_Files(File f) throws IOException //return array of important strings in the file
{
    Text = "";
    String[] Importants = { "", "", "", "" }; //first element is the title,second is all headers,third is img alt,4th is the url
    org.jsoup.nodes.Document doc = Jsoup.parse(f, "UTF-8");
    Importants[0] = doc.title(); //get the title of the file
    //Text=Text+" "+doc.title(); 
    String tag = "h";
    String All_Headers = "";
    Elements Header;/*from  w  w w .ja v a2 s. c o  m*/
    for (int i = 1; i < 20; i++) //loop to get text with headers tag of the file
    {
        tag = "h" + String.valueOf(i);
        Header = doc.select(tag);
        if (Header.size() > 0) {
            Header = doc.getElementsByTag(tag);
            String pConcatenated = "";
            for (Element x : Header) {
                pConcatenated += x.text() + " ";
            }
            All_Headers = All_Headers + pConcatenated;
        } else
            break;

    }
    Importants[1] = All_Headers;
    Text = Text + " " + doc.text(); //get the text of the document
    Elements img = doc.getElementsByTag("img"); //get the text with img tag 
    for (Element element : img) {
        if (element.attr("alt") != null && !(element.attr("alt").equals(""))) {
            Text = Text + " " + element.attr("alt");
            Importants[2] = Importants[2] + " " + element.attr("alt");
        }
    }
    return Importants;
}

From source file:Main.java

public static Document getPage(String strProvider, String strUsername, String strPassword) throws IOException {

    Connection.Response resResponse = Jsoup.connect(String.format(POST_URL, strProvider)).execute();

    Element eleHidden = resResponse.parse().select("input[type=hidden]").first();

    Document docBody = Jsoup.connect(String.format(POST_URL, strProvider))
            .data(eleHidden.attr("name"), eleHidden.attr("value"), "login[username]", strUsername,
                    "page_referrer", "login", "login[password]", strPassword)
            .method(Method.POST).followRedirects(true).cookie("PHPSESSID", resResponse.cookie("PHPSESSID"))
            .execute().parse();/*from  w w w. java2s .c  o m*/

    return docBody;

}

From source file:edu.ucla.cs.scai.swim.qa.ontology.dbpedia.tipicality.DbpediaCsvDownload.java

private static void download(Element e) throws MalformedURLException, IOException {
    for (Element c : e.children()) {
        String tagName = c.tag().getName();
        if (tagName.equals("small")) {
            for (Element c1 : c.children()) {
                if (c1.tag().getName().equals("a") && c1.text().equalsIgnoreCase("csv")) {
                    String href = c1.attr("href");
                    System.out.println("Downloading " + href);
                    try {
                        URL remoteFile = new URL(href);
                        ReadableByteChannel rbc = Channels.newChannel(remoteFile.openStream());
                        String[] s = href.split("\\/");
                        FileOutputStream fos = new FileOutputStream(
                                DBpediaOntology.DBPEDIA_CSV_FOLDER + s[s.length - 1]);
                        fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE);
                    } catch (Exception ex) {
                        ex.printStackTrace();
                    }/*w  w w. ja  v  a  2s.  c o m*/
                }
            }
        } else if (tagName.equals("ul")) {
            for (Element c1 : c.children()) {
                if (c1.tagName().equals("li")) {
                    download(c1);
                }
            }
        }
    }
}

From source file:com.vaadin.sass.testcases.scss.W3ConformanceTests.java

protected static Collection<URI> scrapeIndexForTests(String url, String regexp, int maxTests,
        Collection<URI> excludeUrls) throws Exception {

    URI baseUrl = new URI(url);
    Document doc = Jsoup.connect(url).timeout(10000).get();
    Elements elems = doc.select(String.format("a[href~=%s]", regexp));
    LinkedHashSet<URI> tests = new LinkedHashSet<URI>();
    for (Element e : elems) {
        URI testUrl = new URI(e.attr("href"));
        if (!testUrl.isAbsolute()) {
            testUrl = baseUrl.resolve(testUrl);
        }/*from   w w w .ja  v a  2s .co m*/
        if (tests.size() < maxTests) {
            if (!excludeUrls.contains(testUrl)) {
                tests.add(testUrl);
            }
        } else {
            break;
        }
    }

    return tests;
}

From source file:com.nuance.expertassistant.ContentCrawler.java

public static ArrayList<String> listURLs(String StartUrl, int depth) {

    System.out.println(" Current Depth is : [" + depth + "]");
    // System.out.println(" PARENT URL is : [" + StartUrl + "]");
    // System.out.println(" URL CRAWL Pattern  is : [" + URLCrawlPattern +
    // "]");/*ww w. j a v a 2 s.c o m*/

    final ArrayList<String> tempURLs = new ArrayList<String>();

    try {
        final Document doc = Jsoup.connect(StartUrl).timeout(0).get();
        final Elements links = doc.select("a");

        for (final Element link : links) {
            final String absLink = link.attr("abs:href");
            if (!visitedURLs.contains(absLink) && absLink.contains(URLCrawlPattern)) {
                visitedURLs.add(absLink);
                if (visitedURLs.size() > PageLimit) {
                    ContentExtractor.endDocument();
                    System.out.println(" Max URL Limit Reached - [Stopping ....] ");
                    System.out.println(" [Stopped] ");

                    exit(0);
                }
                tempURLs.add(absLink);
                System.out.println(" URLs Extracted So Far : [" + visitedURLs.size() + "]");
                System.out.println(" Extracting Content From : [" + absLink + "]");
                ContentExtractor.extract(absLink);
            }

        }

    } catch (final Exception e) {
        e.printStackTrace();
    }

    return tempURLs;

}

From source file:io.seldon.importer.articles.AttributesImporterUtils.java

public static Set<String> getTags(Document articleDoc, String tagsCssSelector, String title) {
    Set<String> tagSet = new HashSet<String>();

    if (StringUtils.isNotBlank(tagsCssSelector)) {
        Elements tagsElements = articleDoc.select(tagsCssSelector);
        Element tagsElement = tagsElements.first();
        List<String> tagsParts;
        if ((tagsElement != null) && (tagsElement.attr("content") != null)
                && (StringUtils.isNotBlank(tagsElement.attr("content")))) {
            tagsParts = AttributesImporterUtils.getTagsPartsFromSingleElement(tagsElement);
        } else {//  ww  w .j  a v  a 2  s  . co  m
            tagsParts = AttributesImporterUtils.getTagsPartsFromMultipleElement(tagsElements);

        }
        List<String> extraTagsParts = AttributesImporterUtils.createExtraTagsPartsFromTitle(title, tagsParts);
        tagSet.addAll(tagsParts);
        tagSet.addAll(extraTagsParts);
    }

    return tagSet;
}

From source file:org.keycloak.testsuite.util.saml.LoginBuilder.java

public static HttpUriRequest handleLoginPage(UserRepresentation user, String loginPage) {
    String username = user.getUsername();
    String password = getPasswordOf(user);
    org.jsoup.nodes.Document theLoginPage = Jsoup.parse(loginPage);

    List<NameValuePair> parameters = new LinkedList<>();
    for (Element form : theLoginPage.getElementsByTag("form")) {
        String method = form.attr("method");
        String action = form.attr("action");
        boolean isPost = method != null && "post".equalsIgnoreCase(method);

        for (Element input : form.getElementsByTag("input")) {
            if (Objects.equals(input.id(), "username")) {
                parameters.add(new BasicNameValuePair(input.attr("name"), username));
            } else if (Objects.equals(input.id(), "password")) {
                parameters.add(new BasicNameValuePair(input.attr("name"), password));
            } else {
                parameters.add(new BasicNameValuePair(input.attr("name"), input.val()));
            }/*  w ww. ja v a  2 s  .c o  m*/
        }

        if (isPost) {
            HttpPost res = new HttpPost(action);

            UrlEncodedFormEntity formEntity;
            try {
                formEntity = new UrlEncodedFormEntity(parameters, "UTF-8");
            } catch (UnsupportedEncodingException e) {
                throw new RuntimeException(e);
            }
            res.setEntity(formEntity);

            return res;
        } else {
            UriBuilder b = UriBuilder.fromPath(action);
            for (NameValuePair parameter : parameters) {
                b.queryParam(parameter.getName(), parameter.getValue());
            }
            return new HttpGet(b.build());
        }
    }

    throw new IllegalArgumentException("Invalid login form: " + loginPage);
}

From source file:com.slidespeech.server.service.TextToSpeechService.java

private static String createXML4Cereproc(String fileName, String speakernotes) throws IOException {
    List<String> voices = new ArrayList<String>();

    try {// w  ww . j  a v a2  s .c  om
        Document doc = Jsoup.parse(speakernotes, "");
        doc.outputSettings().prettyPrint(false);
        Elements voiceNodes = doc.select("voice");

        for (Element voiceNode : voiceNodes) {
            String lang = (voiceNode.hasAttr("xml:lang") && !voiceNode.attr("xml:lang").equals(""))
                    ? voiceNode.attr("xml:lang")
                    : "en";
            String gender = (voiceNode.hasAttr("gender") && !voiceNode.attr("gender").equals(""))
                    ? voiceNode.attr("gender")
                    : "female";
            String voiceName = (voiceNode.hasAttr("name") && !voiceNode.attr("name").equals(""))
                    ? voiceNode.attr("name")
                    : "";

            //voice name not set by user -> choose one depending on language and gender
            if (voiceName.equals("")) {
                voiceName = "isabella";//default
                //if(lang.equalsIgnoreCase("en") && gender.equalsIgnoreCase("female")) voiceName = "isabella";
                if (lang.equalsIgnoreCase("en") && gender.equalsIgnoreCase("male"))
                    voiceName = "william";
                if (lang.equalsIgnoreCase("de"))
                    voiceName = "alex";

                voiceNode.attr("name", voiceName);

            }
            if (!voices.contains(voiceName)) {
                voices.add(voiceName);

            }
        }

        BufferedWriter out = new BufferedWriter(new FileWriter(fileName));
        out.write(doc.select("body").first().html());
        //out.write(doc.select("body").first().html());
        out.close();

        for (int i = 0; i < voices.size(); i++) {
            if (voices.get(i).equals("william"))
                voices.set(i, "/opt/cereproc/cerevoice_william_3.0.5_22k.voice");
            if (voices.get(i).equals("isabella"))
                voices.set(i, "/opt/cereproc/cerevoice_isabella_3.0.3_22k.voice");
            if (voices.get(i).equals("alex"))
                voices.set(i, "/opt/cereproc/cerevoice_alex_3.0.0_beta_22k.voice");
        }
    } catch (Exception e) {
        //Fallback if ssml parsing fails
        Writer out = new OutputStreamWriter(new FileOutputStream(fileName));
        try {
            out.write(speakernotes);
        } finally {
            out.close();
        }
        voices.add("ssml parsing failed");
    }

    return StringUtils.join(voices, ",");
}

From source file:com.ignorelist.kassandra.steam.scraper.HtmlTagLoader.java

private static URI getSrcUri(Elements elements) {
    final Element first = Iterables.getFirst(elements, null);
    if (null == first) {
        return null;
    }/*from  w ww .  jav a2 s.  c om*/
    String srcString = first.attr("src");
    if (null == srcString) {
        return null;
    }
    try {
        for (Map.Entry<String, String> replacement : URL_REPLACE.entrySet()) {
            srcString = srcString.replace(replacement.getKey(), replacement.getValue());
        }
        return new URI(srcString);
    } catch (URISyntaxException ex) {
        Logger.getLogger(HtmlTagLoader.class.getName()).log(Level.SEVERE, null, ex);
        return null;
    }
}