List of usage examples for org.jsoup.nodes Element tagName
public String tagName()
From source file:gr.scify.newsum.ui.ViewActivity.java
@Override public void run() { // take the String from the TopicActivity Bundle extras = getIntent().getExtras(); Category = extras.getString(CATEGORY_INTENT_VAR); // Make sure we have updated the data source NewSumUiActivity.setDataSource(this); // Get user sources String sUserSources = Urls.getUserVisibleURLsAsString(ViewActivity.this); // get Topics from TopicActivity (avoid multiple server calls) TopicInfo[] tiTopics = TopicActivity.getTopics(sUserSources, Category, this); // Also get Topic Titles, to display to adapter final String[] saTopicTitles = new String[tiTopics.length]; // Also get Topic IDs final String[] saTopicIDs = new String[tiTopics.length]; // Also get Dates, in order to show in summary title final String[] saTopicDates = new String[tiTopics.length]; // DeHTML titles for (int iCnt = 0; iCnt < tiTopics.length; iCnt++) { // update Titles Array saTopicTitles[iCnt] = Html.fromHtml(tiTopics[iCnt].getTitle()).toString(); // update IDs Array saTopicIDs[iCnt] = tiTopics[iCnt].getID(); // update Date Array saTopicDates[iCnt] = tiTopics[iCnt].getPrintableDate(NewSumUiActivity.getDefaultLocale()); }/*from w ww .j a v a2 s . co m*/ // get the value of the TopicIDs list size (to use in swipe) saTopicIDsLength = saTopicIDs.length; final TextView title = (TextView) findViewById(R.id.title); // Fill topic spinner final ArrayAdapter<CharSequence> adapter = new ArrayAdapter<CharSequence>(this, android.R.layout.simple_spinner_item, saTopicTitles); final TextView tx = (TextView) findViewById(R.id.textView1); // final float minm = tx.getTextSize(); // final float maxm = (minm + 24); // Get active topic int iTopicNum; // If we have returned from a pause if (iPrvSelectedItem >= 0) // use previous selection before pause iTopicNum = iPrvSelectedItem; // else else // use selection from topic page iTopicNum = extras.getInt(TOPIC_ID_INTENT_VAR); final int num = iTopicNum; // create an invisible spinner just to control the summaries of the // category (i will use it later on Swipe) final Spinner spinner = (Spinner) findViewById(R.id.spinner1); adapter.setDropDownViewResource(android.R.layout.simple_spinner_dropdown_item); runOnUiThread(new Runnable() { @Override public void run() { spinner.setAdapter(adapter); // Scroll view init final ScrollView scroll = (ScrollView) findViewById(R.id.scrollView1); final String[] saTopicTitlesArg = saTopicTitles; final String[] saTopicIDsArg = saTopicIDs; final String[] SaTopicDatesArg = saTopicDates; // Add selection event spinner.setOnItemSelectedListener(new OnItemSelectedListener() { public void onItemSelected(AdapterView<?> arg0, View arg1, int arg2, long arg3) { // Changing summary loading = true; showWaitingDialog(); // Update visibility of rating bar final RatingBar rb = (RatingBar) findViewById(R.id.ratingBar); rb.setRating(0.0f); rb.setVisibility(View.VISIBLE); final TextView rateLbl = (TextView) findViewById(R.id.rateLbl); rateLbl.setVisibility(View.VISIBLE); scroll.scrollTo(0, 0); String UserSources = Urls.getUserVisibleURLsAsString(ViewActivity.this); String[] saTopicIDs = saTopicIDsArg; // track summary views per category and topic title if (getAnalyticsPref()) { EasyTracker.getTracker().sendEvent(VIEW_SUMMARY_ACTION, Category, saTopicTitlesArg[arg2], 0l); } if (sCustomCategory.trim().length() > 0) { if (Category.equals(sCustomCategory)) { Context ctxCur = NewSumUiActivity.getAppContext(ViewActivity.this); String sCustomCategoryURL = ctxCur.getResources() .getString(R.string.custom_category_url); // Check if specific element needs to be read String sElementID = ctxCur.getResources() .getString(R.string.custom_category_elementId); // If an element needs to be selected if (sElementID.trim().length() > 0) { try { // Check if specific element needs to be read String sViewOriginalPage = ctxCur.getResources() .getString(R.string.custom_category_visit_source); // Init text by a link to the original page sText = "<p><a href='" + sCustomCategoryURL + "'>" + sViewOriginalPage + "</a></p>"; // Get document Document doc = Jsoup.connect(sCustomCategoryURL).get(); // If a table Element eCur = doc.getElementById(sElementID); if (eCur.tagName().equalsIgnoreCase("table")) { // Get table rows Elements eRows = eCur.select("tr"); // For each row StringBuffer sTextBuf = new StringBuffer(); for (Element eCurRow : eRows) { // Append content // TODO: Use HTML if possible. Now problematic (crashes when we click on link) sTextBuf.append("<p>" + eCurRow.text() + "</p>"); } // Return as string sText = sText + sTextBuf.toString(); } else // else get text sText = eCur.text(); } catch (IOException e) { // Show unavailable text sText = ctxCur.getResources() .getString(R.string.custom_category_unavailable); e.printStackTrace(); } } else sText = Utils.getFromHttp(sCustomCategoryURL, false); } } else { // call getSummary with (sTopicID, sUserSources). Use "All" for // all Sources String[] Summary = NewSumServiceClient.getSummary(saTopicIDs[arg2], UserSources); // check if Summary exists, otherwise display message if (Summary.length == 0) { // DONE APPLICATION HANGS, DOES NOT // WORK. Updated: Probably OK nothingFound = true; AlertDialog.Builder al = new AlertDialog.Builder(ViewActivity.this); al.setMessage(R.string.shouldReloadSummaries); al.setNeutralButton("Ok", new DialogInterface.OnClickListener() { public void onClick(DialogInterface arg0, int arg1) { // Reset cache CacheController.clearCache(); // Restart main activity startActivity(new Intent(getApplicationContext(), NewSumUiActivity.class) .setFlags(Intent.FLAG_ACTIVITY_CLEAR_TOP)); } }); al.setCancelable(false); al.show(); // Return to home activity loading = false; return; } // Generate Summary text for normal categories sText = generateSummaryText(Summary, ViewActivity.this); pText = generatesummarypost(Summary, ViewActivity.this); } // Update HTML tx.setText(Html.fromHtml(sText)); // Allow links to be followed into browser tx.setMovementMethod(LinkMovementMethod.getInstance()); // Also Add Date to Topic Title inside Summary title.setText(saTopicTitlesArg[arg2] + " : " + SaTopicDatesArg[arg2]); // Update size updateTextSize(); // Update visited topics TopicActivity.addVisitedTopicID(saTopicIDs[arg2]); // Done loading = false; closeWaitingDialog(); } @Override public void onNothingSelected(AdapterView<?> arg0) { } }); runOnUiThread(new Runnable() { @Override public void run() { // Get active topic spinner.setSelection(num); } }); } }); runOnUiThread(new Runnable() { @Override public void run() { showHelpDialog(); } }); closeWaitingDialog(); }
From source file:cn.edu.hfut.dmic.contentextractor.ContentExtractor.java
/** * @param node //from ww w. j av a 2 s. c o m * 1. styleclass * 2. ????density??? * 3. p??? * @return */ protected CountInfo computeInfo(Node node) { if (node instanceof Element) { node.removeAttr("style").removeAttr("class"); Element tag = (Element) node; if (tag.text().matches(".{1,20}>.{1,10}>.{1,20}")) { CountInfo countInfo = new CountInfo(); countInfo.density = -200; return countInfo; } CountInfo countInfo = new CountInfo(); for (Node childNode : tag.childNodes()) { CountInfo childCountInfo = computeInfo(childNode); countInfo.textCount += childCountInfo.textCount; countInfo.linkTextCount += childCountInfo.linkTextCount; countInfo.tagCount += childCountInfo.tagCount; countInfo.linkTagCount += childCountInfo.linkTagCount; countInfo.leafList.addAll(childCountInfo.leafList); countInfo.densitySum += childCountInfo.density; countInfo.pCount += childCountInfo.pCount; } countInfo.tagCount++; String tagName = tag.tagName(); if (tagName.equals("a") || tagName.equals("img")) { countInfo.linkTextCount = countInfo.textCount; countInfo.linkTagCount++; } else if (tagName.equals("p")) { countInfo.pCount++; } int pureLen = countInfo.textCount - countInfo.linkTextCount; int len = countInfo.tagCount - countInfo.linkTagCount; if (pureLen == 0 || len == 0) { countInfo.density = 0; } else { countInfo.density = (pureLen + 0.0) / len; } infoMap.put(tag, countInfo); return countInfo; } else if (node instanceof TextNode) { TextNode tn = (TextNode) node; CountInfo countInfo = new CountInfo(); String text = tn.text(); int len = text.length(); countInfo.textCount = len; countInfo.leafList.add(len); return countInfo; } else { return new CountInfo(); } }
From source file:tr.edu.gsu.nerwip.retrieval.reader.wikipedia.WikipediaReader.java
/** * Retrieve the text located in /*w w w. j av a 2 s. co m*/ * a description list (DL) HTML element. * * @param element * Element to be processed. * @param rawStr * Current raw text string. * @param linkedStr * Current text with hyperlinks. */ private void processDescriptionListElement(Element element, StringBuilder rawStr, StringBuilder linkedStr) { // possibly remove the last new line character char c = rawStr.charAt(rawStr.length() - 1); if (c == '\n') { rawStr.deleteCharAt(rawStr.length() - 1); linkedStr.deleteCharAt(linkedStr.length() - 1); } // possibly remove preceeding space c = rawStr.charAt(rawStr.length() - 1); if (c == ' ') { rawStr.deleteCharAt(rawStr.length() - 1); linkedStr.deleteCharAt(linkedStr.length() - 1); } // possibly add a column c = rawStr.charAt(rawStr.length() - 1); if (c != '.' && c != ':' && c != ';') { rawStr.append(":"); linkedStr.append(":"); } // process each list element Elements elements = element.children(); Iterator<Element> it = elements.iterator(); Element tempElt = null; if (it.hasNext()) tempElt = it.next(); while (tempElt != null) { // add leading space rawStr.append(" "); linkedStr.append(" "); // get term String tempName = tempElt.tagName(); if (tempName.equals(XmlNames.ELT_DT)) { // process term processTextElement(tempElt, rawStr, linkedStr); // possibly remove the last new line character c = rawStr.charAt(rawStr.length() - 1); if (c == '\n') { rawStr.deleteCharAt(rawStr.length() - 1); linkedStr.deleteCharAt(linkedStr.length() - 1); } // possibly remove preceeding space c = rawStr.charAt(rawStr.length() - 1); if (c == ' ') { rawStr.deleteCharAt(rawStr.length() - 1); linkedStr.deleteCharAt(linkedStr.length() - 1); } // possibly add a column and space c = rawStr.charAt(rawStr.length() - 1); if (c != '.' && c != ':' && c != ';') { rawStr.append(": "); linkedStr.append(": "); } // go to next element if (it.hasNext()) tempElt = it.next(); else tempElt = null; } // get definition // if(tempName.equals(XmlNames.ELT_DD)) if (tempElt != null) { // process term processTextElement(tempElt, rawStr, linkedStr); // possibly remove the last new line character c = rawStr.charAt(rawStr.length() - 1); if (c == '\n') { rawStr.deleteCharAt(rawStr.length() - 1); linkedStr.deleteCharAt(linkedStr.length() - 1); } // possibly remove preceeding space c = rawStr.charAt(rawStr.length() - 1); if (c == ' ') { rawStr.deleteCharAt(rawStr.length() - 1); linkedStr.deleteCharAt(linkedStr.length() - 1); } // possibly add a semi-column c = rawStr.charAt(rawStr.length() - 1); if (c != '.' && c != ':' && c != ';') { rawStr.append(";"); linkedStr.append(";"); } // go to next element if (it.hasNext()) tempElt = it.next(); else tempElt = null; } } // possibly remove last separator c = rawStr.charAt(rawStr.length() - 1); if (c == ';') { rawStr.deleteCharAt(rawStr.length() - 1); linkedStr.deleteCharAt(linkedStr.length() - 1); c = rawStr.charAt(rawStr.length() - 1); if (c != '.') { rawStr.append("."); linkedStr.append("."); } rawStr.append("\n"); linkedStr.append("\n"); } }
From source file:com.github.irshulx.Components.InputExtensions.java
@Override public Node buildNodeFromHTML(Element element) { String text;//from ww w. j a va 2s . co m int count; TextView tv; HtmlTag tag = HtmlTag.valueOf(element.tagName().toLowerCase()); switch (tag) { case h1: case h2: case h3: RenderHeader(tag, element); break; case p: case div: text = element.html(); count = editorCore.getParentView().getChildCount(); tv = insertEditText(count, null, text); applyStyles(tv, element); break; case blockquote: text = element.html(); count = editorCore.getParentView().getChildCount(); tv = insertEditText(count, null, text); UpdateTextStyle(EditorTextStyle.BLOCKQUOTE, tv); applyStyles(tv, element); } return null; }
From source file:im.ene.lab.attiq.ui.activities.ItemDetailActivity.java
private void buildArticleMenu(@NonNull Article article) { String articleHtml = article.getRenderedBody(); Elements headers = Jsoup.parse(articleHtml).select("h0, h1, h2, h3, h4, h5, h6"); mMenuContainer.removeAllViews();// w w w . ja va2 s . c o m final LayoutInflater inflater = LayoutInflater.from(mMenuContainer.getContext()); if (!UIUtil.isEmpty(headers)) { // 1. Find the top level (lowest level) Iterator<Element> items = headers.iterator(); int topLevel = WebUtil.getHeaderLevel(items.next().tagName()); while (items.hasNext()) { int level = WebUtil.getHeaderLevel(items.next().tagName()); if (topLevel > level) { topLevel = level; } } Log.e(TAG, "buildArticleMenu: " + topLevel); // 2. Build the menu for headers for (final Element item : headers) { View menuItemView = inflater.inflate(R.layout.item_detail_menu_row, mMenuContainer, false); CheckedTextView menuContent = (CheckedTextView) menuItemView.findViewById(R.id.header_content); menuContent.setText(item.text()); int currentLevel = WebUtil.getHeaderLevel(item.tagName()); if (currentLevel - topLevel > 0) { menuContent.setCompoundDrawablesWithIntrinsicBounds( new ThreadedCommentDrawable(mCommentThreadColor, mHeaderDepthWidth, mHeaderDepthGap, currentLevel - topLevel), null, null, null); } menuItemView.setOnClickListener(new View.OnClickListener() { @Override public void onClick(View v) { mMenuAnchor = item; mContentView.clearMatches(); mContentView.findAllAsync(item.text()); } }); mMenuContainer.addView(menuItemView); } mMenuLayout.setDrawerLockMode(DrawerLayout.LOCK_MODE_UNLOCKED); mArticleHeaderMenu.setOnMenuItemClickListener(new MenuItem.OnMenuItemClickListener() { @Override public boolean onMenuItemClick(MenuItem item) { mMenuLayout.openDrawer(GravityCompat.END); return true; } }); } else { mMenuLayout.setDrawerLockMode(DrawerLayout.LOCK_MODE_LOCKED_CLOSED); mArticleHeaderMenu.setOnMenuItemClickListener(new MenuItem.OnMenuItemClickListener() { @Override public boolean onMenuItemClick(MenuItem item) { if (!isFinishing() && mContentContainer != null) { Snackbar.make(mContentContainer, R.string.item_detail_no_menu, Snackbar.LENGTH_LONG).show(); } return true; } }); } }
From source file:no.kantega.publishing.admin.content.htmlfilter.RemoveNestedSpanTagsFilter.java
private static Element createSafeElement(Element sourceEl) { String sourceTag = sourceEl.tagName(); org.jsoup.nodes.Attributes destAttrs = new org.jsoup.nodes.Attributes(); Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(), destAttrs); org.jsoup.nodes.Attributes sourceAttrs = sourceEl.attributes(); for (Attribute sourceAttr : sourceAttrs) { destAttrs.put(sourceAttr);/*from w w w. j a v a 2 s .c o m*/ } return dest; }
From source file:noThreads.ParseLevel2.java
/** * * @param theLinks//w w w. j a v a 2 s . c om * @throws IOException */ public void getSecondLinks(ArrayList<String> theLinks) throws IOException { float num = 0; String temp, attrOfScr, subString; Document doc; boolean flag; for (String sLink : theLinks) { if ((sLink.endsWith(".asx") == true) || (sLink.endsWith(".swf") == true)) { stationLinks2.add(sLink); print("Written to file: %s", sLink); } else { //iframeCase(sLink); doc = parseUrl(sLink, 0); if (doc != null) { Elements media = doc.select("[src]"); print("Fetching %s --> ", sLink); flag = false; for (Element src : media) { if (src.tagName().equals("embed") == true) { flag = true; temp = src.attr("abs:src"); if (temp.endsWith(".swf") == true) { attrOfScr = src.attr("abs:flashvars"); // System.out.println("\nThis is src of embed tag: " // +temp // +"\nThis is attribute flashvars of embed tag: " // +attrOfScr); int start = attrOfScr.indexOf("http://", attrOfScr.indexOf("http://") + 1); int end = attrOfScr.indexOf("&"); char a_char = attrOfScr.charAt(end - 1); if (start != -1 && end != -1) { if (a_char == ';') { subString = attrOfScr.substring(start, end - 1); } else { subString = attrOfScr.substring(start, end); } //System.out.println("\nthis is the result subString: "+subString); stationLinks2.add(subString); } else { //something's wrong, do not process the link flag = false; } break;//link found } stationLinks2.add(temp); break;//link found, load next url } } //end nested for if (flag == false) {//the code has no embed tag stationLinks2.add(sLink); } } } num = (float) (theLinks.indexOf(sLink)) / (float) (theLinks.size()) * WEIGHT_IN_COMPUTATION + curProgress.getCurProgressPart1(); curProgress.setCurProgress((int) num); } //end outer for writeLinksToFile(links2FileName, stationLinks2); print("Written %s to file, second links.", stationLinks2.size()); }
From source file:org.aliuge.crawler.extractor.selector.action.ActionFactory.java
@SuppressWarnings("unchecked") public static SelectorAction create(Element element, String c) { if ("string".equals(c)) { StringActionType $type = EnumUtils.getEnum(StringActionType.class, element.attr("operation")); if (null == $type) { try { throw new Exception( "?" + element.tagName() + "operation"); } catch (Exception e) { e.printStackTrace();// ww w.java 2s . com } } switch ($type) { case after: return new StringAfterAction(element.attr("split")); case afterLast: return new StringAfterLastAction(element.attr("split")); case before: return new StringBeforeAction(element.attr("split")); case beforeLast: return new StringBeforeLastAction(element.attr("split")); case between: return new StringBetweenAction(element.attr("exp")); case filter: return new StringFilterAction(element.attr("filter"), element.attr("charType")); case replace: /* * return new * StringReplaceAction(element.attr("exp"),element.attr * ("replacement")); */ String exp = element.attr("exp"); String[] kv = exp.split(","); if (kv.length == 2) { return new StringReplaceAction(kv[0], kv[1]); } case split: return new StringSplitAction(element.attr("split"), element.attr("index")); case sub: return new StringSubAction(element.attr("exp")); case suffix: return new StringSuffixAction(element.attr("suffix")); case perfix: return new StringPerfixAction(element.attr("perfix")); case regex: return new StringRegexAction(element.attr("exp")); default: break; } } else if ("integer".equals(c) || "int".equals(c)) { IntegerActionType $type = EnumUtils.getEnum(IntegerActionType.class, element.attr("operation")); switch ($type) { case abs: return new IntegerAbsAction(); case between: try { return new IntegerBetweenAction(element.attr("exp"), element.attr("default")); } catch (IntegerBetweenExpressionException e) { e.printStackTrace(); } default: break; } } else if ("date".equals(c)) { } else if ("numerica".equals(c)) { IntegerActionType $type = EnumUtils.getEnum(IntegerActionType.class, element.attr("operation")); switch ($type) { case abs: return new IntegerAbsAction(); case between: try { return new IntegerBetweenAction(element.attr("exp"), element.attr("default")); } catch (Exception e) { e.printStackTrace(); } default: break; } } else if ("file".equals(c)) { FileActionType $type = EnumUtils.getEnum(FileActionType.class, element.attr("operation")); switch ($type) { case download: String dir = element.attr("dir"); String temp = element.attr("fileName"); boolean md5File = false, asyn; if (StringUtils.isNotBlank(temp)) { if ("{md5}".equals(temp)) { md5File = true; } } else md5File = true; temp = element.attr("asyn"); if (StringUtils.isNotBlank(temp)) { asyn = Boolean.parseBoolean(temp); } else { asyn = true; } return new DownLoadFileAction(dir, md5File, asyn); case download_resize: String dir2 = element.attr("dir"); String temp2 = element.attr("fileName"); boolean md5File2 = false, asyn2; if (StringUtils.isNotBlank(temp2)) { if ("{md5}".equals(temp2)) { md5File2 = true; } } else md5File2 = true; temp2 = element.attr("asyn"); if (StringUtils.isNotBlank(temp2)) { asyn2 = Boolean.parseBoolean(temp2); } else { asyn2 = true; } DownLoadImageResizeAction resizeAction = new DownLoadImageResizeAction(dir2, md5File2, asyn2); temp2 = element.attr("width"); if (StringUtils.isNotBlank(temp2)) { resizeAction.setW(Integer.parseInt(temp2)); } temp2 = element.attr("height"); if (StringUtils.isNotBlank(temp2)) { resizeAction.setH(Integer.parseInt(temp2)); } temp2 = element.attr("quality"); if (StringUtils.isNotBlank(temp2)) { resizeAction.setQuality(Float.parseFloat(temp2)); } temp2 = element.attr("del"); if (StringUtils.isNotBlank(temp2)) { resizeAction.setDeleteOldFile(Boolean.parseBoolean(temp2)); } return resizeAction; default: break; } } else { StringActionType $type = EnumUtils.getEnum(StringActionType.class, element.attr("operation")); if (null == $type) { try { throw new Exception( "?" + element.tagName() + "operation"); } catch (Exception e) { e.printStackTrace(); } } switch ($type) { case after: return new StringAfterAction(element.attr("split")); case afterLast: return new StringAfterLastAction(element.attr("split")); case before: return new StringBeforeAction(element.attr("split")); case beforeLast: return new StringBeforeLastAction(element.attr("split")); case between: return new StringBetweenAction(element.attr("exp")); case filter: return new StringFilterAction(element.attr("filter"), element.attr("charType")); case replace: return new StringReplaceAction(element.attr("search"), element.attr("replacement")); case split: return new StringSplitAction(element.attr("split"), element.attr("index")); case sub: return new StringSubAction(element.attr("exp")); case suffix: return new StringSuffixAction(element.attr("suffix")); case perfix: return new StringPerfixAction(element.attr("perfix")); default: break; } } return null; }
From source file:org.aliuge.crawler.extractor.selector.factory.ElementCssSelectorFactory.java
/** * <b>Element??Element??select/*from www . ja v a 2s.c om*/ * @param element * @return */ @SuppressWarnings("unchecked") public static AbstractElementCssSelector create(Element element) { String name = element.attr("name"); String value = element.attr("value"); String type = element.attr("type"); String attr = element.attr("attr"); String pattern = element.attr("pattern"); String regex = element.attr("regex"); String required = element.attr("required"); String sIndex = element.attr("index"); boolean isRequired = false; if (StringUtils.isNotBlank(required)) { isRequired = Boolean.parseBoolean(required); } int index = -1; if (StringUtils.isNotBlank(sIndex)) { index = Integer.parseInt(sIndex) - 1; } AbstractElementCssSelector selector = ElementCssSelectorFactory.create(name, type, value, attr, isRequired, index, regex, pattern); // ? Elements children = element.children(); for (Element e : children) { if ("action".equals(e.tagName())) { SelectorAction action = ActionFactory.create(e, element.attr("type")); if (action != null) selector.addAction(action); } // ?Url else if ("element".equals(e.tagName())) { ((PageElementSelector) selector).addSelector(create(e)); } } return selector; }
From source file:org.aliuge.crawler.extractor.selector.IFConditions.java
/** * ?<b>elementIf?</br>// www. j a v a 2 s. com * ?? * @param document * @return */ public static IFConditions create(Element element) { if (element != null) { String exp = element.attr("test"); IFConditions iFconditions = new IFConditions(exp); Elements selectElements = element.children(); for (Element e : selectElements) { if (e.tagName().equals("element")) { iFconditions.addSelector(ElementCssSelectorFactory.create(e)); } } return iFconditions; } return null; }