List of usage examples for org.jsoup.nodes Element children
public Elements children()
From source file:eu.riscoss.rdc.RDCFossology.java
/** * Parses a LicensesCfg file//from w w w .j a v a 2 s .c o m * @param target * @return HashMap: License Types, each with a Collection of Licenses * @throws IOException */ protected static HashMap<String, Collection<String>> parseLicensesFile(String target) throws IOException { HashMap<String, Collection<String>> result = new HashMap<String, Collection<String>>(); Document document; if (target.startsWith("http")) { document = Jsoup.connect(target).get(); } else { if (target.startsWith("file:")) target = target.substring(5); //File file = new File(target); InputStream in = RDCFossology.class.getResourceAsStream("res/" + target); //System.out.println("Fossology config file used: "+file.getPath()); //System.out.println("Fossology IS file used: "+in.toString()); document = Jsoup.parse(in, "UTF-8", "http://localhost"); } Elements licensesLinks = document.getElementsByAttribute("id"); for (Element element : licensesLinks) { String licenseName = element.child(0).text(); if (element.children().size() > 1) { String s = element.child(1).text(); Collection<String> licensesList = Arrays.asList(s.split("\\s*\\|\\s*")); result.put(licenseName, licensesList); } } return result; }
From source file:eu.riscoss.dataproviders.providers.FossologyDataProvider.java
/** * Parses a LicensesCfg file/*from w w w. j a v a 2s .c o m*/ * @param target * @return HashMap: License Types, each with a Collection of Licenses * @throws IOException */ protected static HashMap<String, Collection<String>> parseLicensesFile(String target) throws IOException { HashMap<String, Collection<String>> result = new HashMap<String, Collection<String>>(); Document document; if (target.startsWith("http")) { document = Jsoup.connect(target).get(); } else { File file = new File(target); System.out.println("Fossology config file used: " + file.getCanonicalPath()); document = Jsoup.parse(file, "UTF-8", "http://localhost"); } // System.out.println(document.outerHtml()); Elements licensesLinks = document.getElementsByAttribute("id"); for (Element element : licensesLinks) { String licenseName = element.child(0).text(); if (element.children().size() > 1) { String s = element.child(1).text(); Collection<String> licensesList = Arrays.asList(s.split("\\s*\\|\\s*")); //("\\s*\\|\\s*")); //xDebug System.out.println("Analysed license type: "+licenseName+": "+licensesList); result.put(licenseName, licensesList); } } return result; }
From source file:com.shareplaylearn.OauthPasswordFlow.java
public static LoginInfo googleLogin(String username, String password, String clientId, String callbackUri) throws URISyntaxException, IOException, AuthorizationException, UnauthorizedException { CloseableHttpClient httpClient = HttpClients.custom().build(); String oAuthQuery = "client_id=" + clientId + "&"; oAuthQuery += "response_type=code&"; oAuthQuery += "scope=openid email&"; oAuthQuery += "redirect_uri=" + callbackUri; URI oAuthUrl = new URI("https", null, "accounts.google.com", 443, "/o/oauth2/auth", oAuthQuery, null); Connection oauthGetCoonnection = Jsoup.connect(oAuthUrl.toString()); Connection.Response oauthResponse = oauthGetCoonnection.method(Connection.Method.GET).execute(); if (oauthResponse.statusCode() != 200) { String errorMessage = "Error contacting Google's oauth endpoint: " + oauthResponse.statusCode() + " / " + oauthResponse.statusMessage(); if (oauthResponse.body() != null) { errorMessage += oauthResponse.body(); }// ww w . ja v a2 s. c o m throw new AuthorizationException(errorMessage); } Map<String, String> oauthCookies = oauthResponse.cookies(); Document oauthPage = oauthResponse.parse(); Element oauthForm = oauthPage.getElementById("gaia_loginform"); System.out.println(oauthForm.toString()); Connection oauthPostConnection = Jsoup.connect("https://accounts.google.com/ServiceLoginAuth"); HashMap<String, String> formParams = new HashMap<>(); for (Element child : oauthForm.children()) { System.out.println("Tag name: " + child.tagName()); System.out.println("attrs: " + Arrays.toString(child.attributes().asList().toArray())); if (child.tagName().equals("input") && child.hasAttr("name")) { String keyName = child.attr("name"); String keyValue = null; if (child.hasAttr("value")) { keyValue = child.attr("value"); } if (keyName != null && keyName.trim().length() != 0 && keyValue != null && keyValue.trim().length() != 0) { oauthPostConnection.data(keyName, keyValue); formParams.put(keyName, keyValue); } } } oauthPostConnection.cookies(oauthCookies); formParams.put("Email", username); formParams.put("Passwd-hidden", password); //oauthPostConnection.followRedirects(false); System.out.println("form post params were: "); for (Map.Entry<String, String> kvp : formParams.entrySet()) { //DO NOT let passwords end up in the logs ;) if (kvp.getKey().equals("Passwd")) { continue; } System.out.println(kvp.getKey() + "," + kvp.getValue()); } System.out.println("form cookies were: "); for (Map.Entry<String, String> cookie : oauthCookies.entrySet()) { System.out.println(cookie.getKey() + "," + cookie.getValue()); } //System.exit(0); Connection.Response postResponse = null; try { postResponse = oauthPostConnection.method(Connection.Method.POST).timeout(5000).execute(); } catch (Throwable t) { System.out.println("Failed to post login information to googles endpoint :/ " + t.getMessage()); System.out.println("This usually means the connection is bad, shareplaylearn.com is down, or " + " google is being a punk - login manually and check."); assertTrue(false); } if (postResponse.statusCode() != 200) { String errorMessage = "Failed to validate credentials: " + oauthResponse.statusCode() + " / " + oauthResponse.statusMessage(); if (oauthResponse.body() != null) { errorMessage += oauthResponse.body(); } throw new UnauthorizedException(errorMessage); } System.out.println("Response headers (after post to google form & following redirect):"); for (Map.Entry<String, String> header : postResponse.headers().entrySet()) { System.out.println(header.getKey() + "," + header.getValue()); } System.out.println("Final response url was: " + postResponse.url().toString()); String[] args = postResponse.url().toString().split("&"); LoginInfo loginInfo = new LoginInfo(); for (String arg : args) { if (arg.startsWith("access_token")) { loginInfo.accessToken = arg.split("=")[1].trim(); } else if (arg.startsWith("id_token")) { loginInfo.idToken = arg.split("=")[1].trim(); } else if (arg.startsWith("expires_in")) { loginInfo.expiry = arg.split("=")[1].trim(); } } //Google doesn't actually throw a 401 or anything - it just doesn't redirect //and sends you back to it's login page to try again. //So this is what happens with an invalid password. if (loginInfo.accessToken == null || loginInfo.idToken == null) { //Document oauthPostResponse = postResponse.parse(); //System.out.println("*** Oauth response from google *** "); //System.out.println(oauthPostResponse.toString()); throw new UnauthorizedException( "Error retrieving authorization: did you use the correct username/password?"); } String[] idTokenFields = loginInfo.idToken.split("\\."); if (idTokenFields.length < 3) { throw new AuthorizationException("Error parsing id token " + loginInfo.idToken + "\n" + "it only had " + idTokenFields.length + " field!"); } String jwtBody = new String(Base64.decodeBase64(idTokenFields[1]), StandardCharsets.UTF_8); loginInfo.idTokenBody = new Gson().fromJson(jwtBody, OauthJwt.class); loginInfo.id = loginInfo.idTokenBody.sub; return loginInfo; }
From source file:com.shareplaylearn.utilities.OauthPasswordFlow.java
public static LoginInfo googleLogin(String username, String password, String clientId, String callbackUri) throws URISyntaxException, IOException, AuthorizationException, UnauthorizedException { CloseableHttpClient httpClient = HttpClients.custom().build(); String oAuthQuery = "client_id=" + clientId + "&"; oAuthQuery += "response_type=code&"; oAuthQuery += "scope=openid email&"; oAuthQuery += "redirect_uri=" + callbackUri; URI oAuthUrl = new URI("https", null, "accounts.google.com", 443, "/o/oauth2/auth", oAuthQuery, null); Connection oauthGetCoonnection = Jsoup.connect(oAuthUrl.toString()); Connection.Response oauthResponse = oauthGetCoonnection.method(Connection.Method.GET).execute(); if (oauthResponse.statusCode() != 200) { String errorMessage = "Error contacting Google's oauth endpoint: " + oauthResponse.statusCode() + " / " + oauthResponse.statusMessage(); if (oauthResponse.body() != null) { errorMessage += oauthResponse.body(); }//from w w w .j a v a 2 s . c o m throw new AuthorizationException(errorMessage); } Map<String, String> oauthCookies = oauthResponse.cookies(); Document oauthPage = oauthResponse.parse(); Element oauthForm = oauthPage.getElementById("gaia_loginform"); System.out.println(oauthForm.toString()); Connection oauthPostConnection = Jsoup.connect("https://accounts.google.com/ServiceLoginAuth"); HashMap<String, String> formParams = new HashMap<>(); for (Element child : oauthForm.children()) { if (child.tagName().equals("input") && child.hasAttr("name")) { String keyName = child.attr("name"); String keyValue = null; if (keyName.equals("Email")) { keyValue = username; } else if (keyName.equals("Passwd")) { keyValue = password; } else if (child.hasAttr("value")) { keyValue = child.attr("value"); } if (keyValue != null) { oauthPostConnection.data(keyName, keyValue); formParams.put(keyName, keyValue); } } } oauthPostConnection.cookies(oauthCookies); //oauthPostConnection.followRedirects(false); System.out.println("form post params were: "); for (Map.Entry<String, String> kvp : formParams.entrySet()) { //DO NOT let passwords end up in the logs ;) if (kvp.getKey().equals("Passwd")) { continue; } System.out.println(kvp.getKey() + "," + kvp.getValue()); } System.out.println("form cookies were: "); for (Map.Entry<String, String> cookie : oauthCookies.entrySet()) { System.out.println(cookie.getKey() + "," + cookie.getValue()); } Connection.Response postResponse = null; try { postResponse = oauthPostConnection.method(Connection.Method.POST).timeout(5000).execute(); } catch (Throwable t) { System.out.println("Failed to post login information to googles endpoint :/ " + t.getMessage()); System.out.println("This usually means the connection is bad, shareplaylearn.com is down, or " + " google is being a punk - login manually and check."); assertTrue(false); } if (postResponse.statusCode() != 200) { String errorMessage = "Failed to validate credentials: " + oauthResponse.statusCode() + " / " + oauthResponse.statusMessage(); if (oauthResponse.body() != null) { errorMessage += oauthResponse.body(); } throw new UnauthorizedException(errorMessage); } System.out.println("Response headers (after post to google form & following redirect):"); for (Map.Entry<String, String> header : postResponse.headers().entrySet()) { System.out.println(header.getKey() + "," + header.getValue()); } System.out.println("Final response url was: " + postResponse.url().toString()); String[] args = postResponse.url().toString().split("&"); LoginInfo loginInfo = new LoginInfo(); for (String arg : args) { if (arg.startsWith("access_token")) { loginInfo.accessToken = arg.split("=")[1].trim(); } else if (arg.startsWith("id_token")) { loginInfo.idToken = arg.split("=")[1].trim(); } else if (arg.startsWith("expires_in")) { loginInfo.expiry = arg.split("=")[1].trim(); } } //Google doesn't actually throw a 401 or anything - it just doesn't redirect //and sends you back to it's login page to try again. //So this is what happens with an invalid password. if (loginInfo.accessToken == null || loginInfo.idToken == null) { //Document oauthPostResponse = postResponse.parse(); //System.out.println("*** Oauth response from google *** "); //System.out.println(oauthPostResponse.toString()); throw new UnauthorizedException( "Error retrieving authorization: did you use the correct username/password?"); } String[] idTokenFields = loginInfo.idToken.split("\\."); if (idTokenFields.length < 3) { throw new AuthorizationException("Error parsing id token " + loginInfo.idToken + "\n" + "it only had " + idTokenFields.length + " field!"); } String jwtBody = new String(Base64.decodeBase64(idTokenFields[1]), StandardCharsets.UTF_8); loginInfo.idTokenBody = new Gson().fromJson(jwtBody, OauthJwt.class); loginInfo.id = loginInfo.idTokenBody.sub; return loginInfo; }
From source file:com.kantenkugel.discordbot.jdocparser.JDocParser.java
static void parse(final String jdocBase, final String name, final InputStream inputStream, Map<String, ClassDocumentation> docs) { final String[] pathSplits = name.split("/"); final String fileName = pathSplits[pathSplits.length - 1]; if (!Character.isUpperCase(fileName.charAt(0))) { //ignore jdoc structure html return;/*from w w w . j a va 2s. c o m*/ } final String[] nameSplits = fileName.split("\\."); final String className = nameSplits[nameSplits.length - 2]; final String fullName = fileName.substring(0, fileName.length() - nameSplits[nameSplits.length - 1].length() - 1); try (BufferedReader buffer = new BufferedReader(new InputStreamReader(inputStream))) { //create dom Document final String content = buffer.lines().collect(Collectors.joining("\n")); Document document = Jsoup.parse(content); //classDocument (classname, package, description) Element titleElem = getSingleElementByClass(document, "title"); final String classSig = JDocUtil.fixSpaces(titleElem.text()); Element packageElem = titleElem.previousElementSibling(); if (packageElem.children().size() > 1) { packageElem = packageElem.children().last(); } final String pack = JDocUtil.fixSpaces(packageElem.text()); final String link = JDocUtil.getLink(jdocBase, pack, fullName); Element descriptionElement = null; Elements descriptionCandidates = document.select(".description .block"); if (descriptionCandidates.size() > 1) { List<Element> removed = descriptionCandidates.stream().map(elem -> elem.child(0)) .filter(child -> child != null && !child.className().startsWith("deprecat")) .map(Element::parent).collect(Collectors.toList()); if (removed.size() != 1) throw new RuntimeException("Found too many description candidates"); descriptionElement = removed.get(0); } else if (descriptionCandidates.size() == 1) { descriptionElement = descriptionCandidates.get(0); } final String description = descriptionElement == null ? "" : JDocUtil.formatText(descriptionElement.html(), link); final ClassDocumentation classDoc = new ClassDocumentation(pack, fullName, classSig, description, classSig.startsWith("Enum")); //methods, fields final Element details = document.getElementsByClass("details").first(); if (details != null) { //methods Element tmp = getSingleElementByQuery(details, "a[name=\"method.detail\"]"); List<DocBlock> docBlock = getDocBlock(jdocBase, tmp, classDoc); if (docBlock != null) { for (DocBlock block : docBlock) { Set<MethodDocumentation> mdocs = classDoc.methodDocs .computeIfAbsent(block.title.toLowerCase(), key -> new HashSet<>()); mdocs.add(new MethodDocumentation(classDoc, block.signature, block.hashLink, block.description, block.fields)); } } //vars tmp = getSingleElementByQuery(details, "a[name=\"field.detail\"]"); docBlock = getDocBlock(jdocBase, tmp, classDoc); if (docBlock != null) { for (DocBlock block : docBlock) { classDoc.classValues.put(block.title.toLowerCase(), new ValueDocumentation(classDoc, block.title, block.hashLink, block.signature, block.description)); } } //enum-values tmp = getSingleElementByQuery(details, "a[name=\"enum.constant.detail\"]"); docBlock = getDocBlock(jdocBase, tmp, classDoc); if (docBlock != null) { for (DocBlock block : docBlock) { classDoc.classValues.put(block.title.toLowerCase(), new ValueDocumentation(classDoc, block.title, block.hashLink, block.signature, block.description)); } } } final Element methodSummary = getSingleElementByQuery(document, "a[name=\"method.summary\"]"); classDoc.inheritedMethods.putAll(getInheritedMethods(methodSummary)); //storing if (nameSplits.length > 2) { if (!docs.containsKey(nameSplits[0].toLowerCase())) docs.put(nameSplits[0].toLowerCase(), new ClassDocumentation(null, null, null, null, false)); ClassDocumentation parent = docs.get(nameSplits[0].toLowerCase()); for (int i = 1; i < nameSplits.length - 2; i++) { if (!parent.subClasses.containsKey(nameSplits[i].toLowerCase())) parent.subClasses.put(nameSplits[i].toLowerCase(), new ClassDocumentation(null, null, null, null, false)); parent = parent.subClasses.get(nameSplits[i].toLowerCase()); } if (parent.subClasses.containsKey(className.toLowerCase())) classDoc.subClasses.putAll(parent.subClasses.get(className.toLowerCase()).subClasses); parent.subClasses.put(className.toLowerCase(), classDoc); } if (docs.containsKey(fullName.toLowerCase())) { ClassDocumentation current = docs.get(fullName.toLowerCase()); if (current.classSig != null) throw new RuntimeException("Got a class-name conflict with classes " + classDoc.classSig + "(" + classDoc.className + ") AND " + current.classSig + "(" + current.className + ")"); classDoc.subClasses.putAll(current.subClasses); } docs.put(fullName.toLowerCase(), classDoc); } catch (final IOException | NullPointerException ex) { JDocUtil.LOG.error("Got excaption for element {}", fullName, ex); } try { inputStream.close(); } catch (final IOException e) { JDocUtil.LOG.error("Error closing inputstream", e); } }
From source file:com.screenslicer.common.CommonUtil.java
public static String getFirstChildTextByTagName(Elements elements, String tagName) { if (elements == null) { return null; }//from ww w. j a v a 2 s.com if (elements.isEmpty()) { return null; } Element element = elements.get(0); for (Element child : element.children()) { if (child.tagName().equalsIgnoreCase(tagName)) { return child.text(); } } return null; }
From source file:com.kantenkugel.discordbot.jdocparser.JDocParser.java
private static Map<String, String> getInheritedMethods(Element summaryAnchor) { Map<String, String> inherited = new HashMap<>(); if (summaryAnchor == null) return inherited; summaryAnchor = summaryAnchor.parent(); Elements inheritAnchors = summaryAnchor.select("a[name^=\"methods.inherited.from.class\"]"); for (Element inheritAnchor : inheritAnchors) { if (inheritAnchor.siblingElements().size() != 2) throw new RuntimeException("Got unexpected html while parsing inherited methods from class " + inheritAnchor.attr("name")); Element next = inheritAnchor.nextElementSibling(); if (!next.tagName().equals("h3")) throw new RuntimeException("Got unexpected html while parsing inherited methods from class " + inheritAnchor.attr("name")); Element sub = next.children().last(); if (sub == null || !sub.tagName().equals("a")) continue; String parent = sub.text().toLowerCase(); next = next.nextElementSibling(); if (!next.tagName().equals("code")) throw new RuntimeException("Got unexpected html while parsing inherited methods from class " + inheritAnchor.attr("name")); for (sub = next.children().first(); sub != null; sub = sub.nextElementSibling()) { if (sub.tagName().equals("a")) { inherited.putIfAbsent(sub.text().toLowerCase(), parent); }/* w ww . ja v a2 s. c om*/ } } return inherited; }
From source file:net.slkdev.swagger.confluence.service.impl.XHtmlToConfluenceServiceImpl.java
private static List<ConfluencePage> handlePagination() { final List<ConfluencePage> confluencePages = new ArrayList<>(); final SwaggerConfluenceConfig swaggerConfluenceConfig = SWAGGER_CONFLUENCE_CONFIG.get(); final PaginationMode paginationMode = swaggerConfluenceConfig.getPaginationMode(); final Document originalDocument = SWAGGER_DOCUMENT.get(); final Document transformedDocument = originalDocument.clone(); final Elements categoryElements = transformedDocument.select(".sect1"); // Remove ToC form the transformed document final Elements toc = transformedDocument.select(".toc"); toc.html(""); toc.unwrap();/*from w ww.j a va2s .c o m*/ // For Single Page Mode, the incoming XHTML can be used directly. if (paginationMode == SINGLE_PAGE) { final ConfluencePage confluencePage = ConfluencePageBuilder.aConfluencePage() .withPageType(PageType.ROOT).withOriginalTitle(swaggerConfluenceConfig.getTitle()) .withConfluenceTitle(buildConfluenceTitle(swaggerConfluenceConfig.getTitle(), null, null)) .build(); if (swaggerConfluenceConfig.isIncludeTableOfContentsOnSinglePage()) { confluencePage.setXhtml(originalDocument.html()); } else { confluencePage.setXhtml(transformedDocument.html()); } confluencePages.add(confluencePage); return confluencePages; } // Before beginning further processing, we need to know if we're in individual // page mode or not, as that will effect how we split the DOM. If we're in this // mode then the category pages will contain inner table of contents. final boolean individualPages = (paginationMode == INDIVIDUAL_PAGES); // From here on, if we're still proceeding then we know the meat of the document // will go in sub-pages. So for the master page, we will use the table of contents final Elements tocElements = originalDocument.select(".toc"); final List<String> innerTocXHtmlList = new ArrayList<>(); final Elements innerTocElements = originalDocument.select(".sectlevel2"); for (final Element innerTocElement : innerTocElements) { // If we're in individual page mode, then we collect the inner ToCs if (individualPages) { final StringBuilder tocHtml = new StringBuilder(); tocHtml.append("<div id=\"toc\" class=\"toc\">"); tocHtml.append("<h4 id=\"toctitle\">Table of Contents</h4>"); tocHtml.append("<div><ul class=\"sectlevel1\">"); tocHtml.append(innerTocElement.html()); tocHtml.append("</ul></div></div>"); innerTocXHtmlList.add(tocHtml.toString()); } // If we're in category page mode, then we strip out the inner table of contents. else { innerTocElement.html(""); innerTocElement.unwrap(); } } // Build the Root Page w/ the Appropriate Level of Table of Contents final ConfluencePage rootConfluencePage = ConfluencePageBuilder.aConfluencePage() .withPageType(PageType.ROOT).withOriginalTitle(swaggerConfluenceConfig.getTitle()) .withConfluenceTitle(buildConfluenceTitle(swaggerConfluenceConfig.getTitle(), null, null)) .withXhtml(tocElements.html()).build(); confluencePages.add(rootConfluencePage); int category = 1; // Now we process the category pages for (final Element categoryElement : categoryElements) { // Fetch the title from the first child, which is the header element final String categoryTitle = categoryElement.children().first().text(); // If we're in individual mode then we need these to be sub table of contents if (individualPages) { final ConfluencePage categoryConfluencePage = ConfluencePageBuilder.aConfluencePage() .withPageType(PageType.CATEGORY).withOriginalTitle(categoryTitle) .withConfluenceTitle(buildConfluenceTitle(categoryTitle, category, null)) .withXhtml(innerTocXHtmlList.get(category - 1)).build(); confluencePages.add(categoryConfluencePage); final Elements individualElements = categoryElement.getElementsByClass("sect2"); int individual = 1; for (final Element individualElement : individualElements) { final String individualTitle = individualElement.children().first().text(); final ConfluencePage individualConfluencePage = ConfluencePageBuilder.aConfluencePage() .withPageType(INDIVIDUAL).withOriginalTitle(individualTitle) .withConfluenceTitle(buildConfluenceTitle(individualTitle, category, individual)) .withXhtml(individualElement.html()).build(); confluencePages.add(individualConfluencePage); individual++; } category++; continue; } // If we're in category mode, we use the remaining page data final ConfluencePage categoryConfluencePage = ConfluencePageBuilder.aConfluencePage() .withPageType(PageType.CATEGORY).withOriginalTitle(categoryTitle) .withConfluenceTitle(buildConfluenceTitle(categoryTitle, category, null)) .withXhtml(categoryElement.html()).build(); confluencePages.add(categoryConfluencePage); category++; } return confluencePages; }
From source file:io.github.carlomicieli.footballdb.starter.parsers.SeasonGamesParser.java
private Optional<Element> extractTableBody(Element table) { return table.children().stream().filter(e -> e.tagName().equals("tbody")).findFirst(); }
From source file:io.github.carlomicieli.footballdb.starter.parsers.DraftParser.java
private Optional<Element> extractTableBody(Element tab) { return tab.children().stream().filter(e -> e.tagName().equals("tbody")).findFirst(); }