List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4
public static final String unescapeHtml4(final String input)
Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes.
From source file:com.chiorichan.util.WebFunc.java
public static String unescapeHTML(String l) { return StringEscapeUtils.unescapeHtml4(l); }
From source file:elh.eus.absa.CorpusReader.java
/** * @param sentences the sentences to set *//*from www .ja va 2s . co m*/ public void addSentence(String id, String text) { this.sentences.put(id, StringEscapeUtils.unescapeHtml4(StringEscapeUtils.unescapeJava(text))); }
From source file:com.romeikat.datamessie.core.rss.task.maintenance.MaintenanceTask.java
private void unescapeHtmlCharsFromDocument(final TaskExecution taskExecution) throws TaskCancelledException { final HibernateSessionProvider sessionProvider = new HibernateSessionProvider(sessionFactory); // Get all IDs final List<Long> ids = documentDao.getIds(sessionProvider.getStatelessSession()); sessionProvider.closeStatelessSession(); // Process IDs in batches final List<List<Long>> batches = CollectionUtil.splitIntoSubListsBySize(ids, batchSize); for (final List<Long> batch : batches) { new ParallelProcessing<Long>(sessionFactory, batch) { @Override// www . j a v a 2 s. c o m public void doProcessing(final HibernateSessionProvider sessionProvider, final Long documentId) { // Unescape characters final Document document = documentDao.getEntity(sessionProvider.getStatelessSession(), documentId); if (document == null) { return; } final String title = document.getTitle(); final String unescapedTitle = StringEscapeUtils.unescapeHtml4(title); document.setTitle(unescapedTitle); final String description = document.getDescription(); final String unescapedDescription = StringEscapeUtils.unescapeHtml4(description); document.setDescription(unescapedDescription); final boolean titleChanged = title != null && unescapedTitle != null && !title.equals(unescapedTitle); final boolean descriptionChanged = description != null && unescapedDescription != null && !description.equals(unescapedDescription); if (titleChanged || descriptionChanged) { documentDao.update(sessionProvider.getStatelessSession(), document); LOG.info("Unescaped HTML characters from document {}", documentId); } } }; taskExecution.checkpoint(); } }
From source file:com.glowinteractive.reforger.Item.java
@Override public boolean visit(TagNode parent, HtmlNode current) { Pattern p = null;/*from w w w . j a va 2s .co m*/ Matcher m = null; String text = null; Stat key = null; int value = -1; boolean PARSE_MUTABLE = false; int GROUP_INDEX_KEY = -1, GROUP_INDEX_VAL = -1; if (current instanceof CommentNode) { final CommentNode c = (CommentNode) current; final String content = c.getCommentedContent(); if (content.matches("<!--rtg\\d\\d-->")) { //<editor-fold defaultstate="collapsed" desc="Secondary Stats for Non-Random Itemization Pieces."> // e.g.: "Equip: Increases your critical strike rating by 168" GROUP_INDEX_KEY = 1; GROUP_INDEX_VAL = 2; PARSE_MUTABLE = true; text = parent.getText().toString().replace(" ", " "); p = Pattern.compile(TOOLTIP_RATING_NON_RANDOM); //</editor-fold> } if (content.matches("<!--ee-->")) { //<editor-fold defaultstate="collapsed" desc="Permanent enchant effects."> // e.g.: "+190 Attack Power and +55 Critical Strike Rating" GROUP_INDEX_KEY = 2; GROUP_INDEX_VAL = 1; PARSE_MUTABLE = false; text = parent.getText().toString(); p = Pattern.compile(TOOLTIP_RATING_RANDOM_OR_BONUS); //</editor-fold> } } if (current instanceof ContentNode) { final ContentNode c = (ContentNode) current; final String content = c.getContent().toString(); if ("q1".equals(parent.getAttributeByName("class"))) { //<editor-fold defaultstate="collapsed" desc="Secondary Stats for Random Itemization Pieces."> // e.g.: "+168 Critical Strike Rating" GROUP_INDEX_KEY = 2; GROUP_INDEX_VAL = 1; PARSE_MUTABLE = true; text = content; p = Pattern.compile(TOOLTIP_RATING_RANDOM_OR_BONUS); //</editor-fold> } if (parent.getAttributeByName("class") != null && parent.getAttributeByName("class").startsWith("socket-")) { //<editor-fold defaultstate="collapsed" desc="Secondary Stats for gems."> // e.g.: "+20 Agility and +20 Mastery Rating" GROUP_INDEX_KEY = 2; GROUP_INDEX_VAL = 1; PARSE_MUTABLE = false; text = content; p = Pattern.compile(TOOLTIP_RATING_RANDOM_OR_BONUS); //</editor-fold> } if (content.startsWith("Socket Bonus") && "q2".equals(parent.getAttributeByName("class"))) { //<editor-fold defaultstate="collapsed" desc="Secondary Stats for Socket Bonuses."> // e.g.: "Socket Bonus: +30 Haste" GROUP_INDEX_KEY = 2; GROUP_INDEX_VAL = 1; PARSE_MUTABLE = false; text = content; p = Pattern.compile(TOOLTIP_RATING_RANDOM_OR_BONUS); //</editor-fold> } } if (text == null) { // Didn't match any known constructs. return true; } m = p.matcher(StringEscapeUtils.unescapeHtml4(text)); while (m.find()) { key = null; value = Integer.parseInt(m.group(GROUP_INDEX_VAL)); for (Stat s : Stat.values()) { if (s.shortName().equalsIgnoreCase(m.group(GROUP_INDEX_KEY))) { key = s; } } // TODO: Refactor output to parse() via mutable / immutable extension to StatKVMap. if (key != null) { if (PARSE_MUTABLE) { _mutableStats = _mutableStats.add(new StatKVPair(key, value)); System.out.println(String.format(" %+5d", value) + " " + key.shortName()); } else { _immutableStats = _immutableStats.add(new StatKVPair(key, value)); System.out.println(String.format(" %+5d", value) + " " + key.shortName() + " [Immutable]"); } } } return true; }
From source file:fr.mcc.ginco.rest.services.ThesaurusRestService.java
/** * Public method used to archive thesaurus * * @throws IOException//w w w . j a v a 2 s . c o m */ @GET @Path("/archiveVocabulary") @Consumes({ MediaType.APPLICATION_JSON }) @Produces(MediaType.TEXT_HTML) @PreAuthorize("hasPermission(#thesaurusId, '0')") public String archiveVocabulary(@QueryParam("thesaurusId") String thesaurusId) throws IOException { Thesaurus object = thesaurusService.getThesaurusById(thesaurusId); ThesaurusView view = null; if (object != null) { Thesaurus result = thesaurusService.archiveThesaurus(object); if (result != null) { view = thesaurusViewConverter.convert(result); //Update vocabulary date thesaurusService.updateThesaurusDate(object); } else { logger.error("Failed to archive thesaurus"); } } ObjectMapper mapper = new ObjectMapper(); String serialized = mapper.writeValueAsString(new ExtJsonFormLoadData(view)); return StringEscapeUtils.unescapeHtml4(serialized); }
From source file:de.uni_potsdam.hpi.bpt.promnicat.importer.npb.NPBImporter.java
/** * Parses all meta data from the given {@link Element}'s 'metaInformation' tags. * //from ww w . j a v a2 s . co m * @param rootElement the root element of the XML to consider for parsing * @return all parsed meta data */ private Map<String, Collection<String>> parseMetaData(Element rootElement) { Map<String, Collection<String>> metaData = new HashMap<String, Collection<String>>(); List<?> metaDataElements = rootElement.getChildren(KEY_META_INFORMATION, rootElement.getNamespace()); for (Object metaDataElement : metaDataElements) { if (metaDataElement instanceof Element && ((Element) metaDataElement).getName().equals(KEY_META_INFORMATION)) { List<?> metaInformation = ((Element) metaDataElement).getChildren(KEY_SCHLUESSEL, rootElement.getNamespace()); if (!metaInformation.isEmpty()) { String key = ((Element) metaInformation.get(0)).getChild(KEY_CODE).getText(); metaInformation = ((Element) metaDataElement).getChildren(KEY_WERT, rootElement.getNamespace()); Collection<String> values = new ArrayList<String>(); if (!metaInformation.isEmpty()) { Element valueElement = (Element) metaInformation.get(0); if (valueElement.getChild(KEY_LISTE, rootElement.getNamespace()) == null) { //single value Element value = ((Element) valueElement.getChildren().get(0)); values.add(StringEscapeUtils.unescapeHtml4(value.getText())); String name = value.getAttributeValue(KEY_NAME); if (name != null) { values.add(StringEscapeUtils.unescapeHtml4(name)); } } else { //parse all values List<?> valueElements = valueElement.getChild(KEY_LISTE, rootElement.getNamespace()) .getChildren(); for (Object value : valueElements) { values.add(StringEscapeUtils.unescapeHtml4(((Element) value).getText())); } } } metaData.put(key, values); } } } return metaData; }
From source file:de.akra.idocit.wsdl.services.DocumentationParser.java
/** * Reads the child nodes from the addressee element and converts the <br /> and * <tab /> elements to the corresponding ASCII characters. Unnecessary newlines * and tabulators are removed from the text. After the text is build * {@link String#trim()} is run on it./* w ww. j a va2 s.c om*/ * * @param addresseeElement * The addressee element from which the text should be read. * @return The normalized text. */ private static String readTextFromAddresseElement(Node addresseeElement) { StringBuilder text = new StringBuilder(); NodeList nodes = addresseeElement.getChildNodes(); for (int i = 0; i < nodes.getLength(); ++i) { Node node = nodes.item(i); switch (node.getNodeType()) { case Node.TEXT_NODE: String unescapedText = StringEscapeUtils.unescapeHtml4(node.getNodeValue()); text.append(StringUtils.cleanFormatting(unescapedText)); break; case Node.ELEMENT_NODE: if (node.getNodeName().equalsIgnoreCase(HTML_TAG_BR)) { text.append(System.getProperty("line.separator")); } else if (node.getNodeName().equalsIgnoreCase(HTML_TAG_TAB)) { text.append('\t'); } break; default: { // Do nothing! logger.info("The node-type is " + node.getNodeType()); } } } return text.toString().trim(); }
From source file:com.ryan.ryanreader.reddit.prepared.RedditPreparedPost.java
public static void onActionMenuItemSelected(final RedditPreparedPost post, final Fragment fragmentParent, final Action action) { final Activity activity = fragmentParent.getSupportActivity(); switch (action) { case UPVOTE:/*from w w w.j a v a 2 s. c o m*/ post.action(activity, RedditAPI.RedditAction.UPVOTE); break; case DOWNVOTE: post.action(activity, RedditAPI.RedditAction.DOWNVOTE); break; case UNVOTE: post.action(activity, RedditAPI.RedditAction.UNVOTE); break; case SAVE: post.action(activity, RedditAPI.RedditAction.SAVE); break; case UNSAVE: post.action(activity, RedditAPI.RedditAction.UNSAVE); break; case HIDE: post.action(activity, RedditAPI.RedditAction.HIDE); break; case UNHIDE: post.action(activity, RedditAPI.RedditAction.UNHIDE); break; case REPORT: new AlertDialog.Builder(activity).setTitle(R.string.action_report) .setMessage(R.string.action_report_sure) .setPositiveButton(R.string.action_report, new DialogInterface.OnClickListener() { public void onClick(final DialogInterface dialog, final int which) { post.action(activity, RedditAPI.RedditAction.REPORT); // TODO update the view to show the result // TODO don't forget, this also hides } }).setNegativeButton(R.string.dialog_cancel, null).show(); break; case EXTERNAL: { final Intent intent = new Intent(Intent.ACTION_VIEW); intent.setData(Uri.parse(post.url)); activity.startActivity(intent); break; } case SELFTEXT_LINKS: { final HashSet<String> linksInComment = LinkHandler .computeAllLinks(StringEscapeUtils.unescapeHtml4(post.src.selftext)); if (linksInComment.isEmpty()) { General.quickToast(activity, R.string.error_toast_no_urls_in_self); } else { final String[] linksArr = linksInComment.toArray(new String[linksInComment.size()]); final AlertDialog.Builder builder = new AlertDialog.Builder(activity); builder.setItems(linksArr, new DialogInterface.OnClickListener() { public void onClick(DialogInterface dialog, int which) { LinkHandler.onLinkClicked(activity, linksArr[which], false, post.src); dialog.dismiss(); } }); final AlertDialog alert = builder.create(); alert.setTitle(R.string.action_selftext_links); alert.setCanceledOnTouchOutside(true); alert.show(); } break; } case SAVE_IMAGE: { final RedditAccount anon = RedditAccountManager.getAnon(); CacheManager.getInstance(activity) .makeRequest(new CacheRequest(General.uriFromString(post.imageUrl), anon, null, Constants.Priority.IMAGE_VIEW, 0, CacheRequest.DownloadType.IF_NECESSARY, Constants.FileType.IMAGE, false, false, false, activity) { @Override protected void onCallbackException(Throwable t) { BugReportActivity.handleGlobalError(context, t); } @Override protected void onDownloadNecessary() { General.quickToast(context, R.string.download_downloading); } @Override protected void onDownloadStarted() { } @Override protected void onFailure(RequestFailureType type, Throwable t, StatusLine status, String readableMessage) { final RRError error = General.getGeneralErrorForFailure(context, type, t, status); General.showResultDialog(activity, error); } @Override protected void onProgress(long bytesRead, long totalBytes) { } @Override protected void onSuccess(CacheManager.ReadableCacheFile cacheFile, long timestamp, UUID session, boolean fromCache, String mimetype) { File dst = new File( Environment.getExternalStoragePublicDirectory(Environment.DIRECTORY_PICTURES), General.uriFromString(post.imageUrl).getPath()); if (dst.exists()) { int count = 0; while (dst.exists()) { count++; dst = new File( Environment.getExternalStoragePublicDirectory( Environment.DIRECTORY_PICTURES), count + "_" + General.uriFromString(post.imageUrl).getPath().substring(1)); } } try { General.copyFile(cacheFile.getInputStream(), dst); } catch (IOException e) { notifyFailure(RequestFailureType.STORAGE, e, null, "Could not copy file"); return; } activity.sendBroadcast(new Intent(Intent.ACTION_MEDIA_SCANNER_SCAN_FILE, Uri.parse("file://" + dst.getAbsolutePath()))); General.quickToast(context, context.getString(R.string.action_save_image_success) + " " + dst.getAbsolutePath()); } }); break; } case SHARE: { final Intent mailer = new Intent(Intent.ACTION_SEND); mailer.setType("text/plain"); mailer.putExtra(Intent.EXTRA_SUBJECT, post.title); mailer.putExtra(Intent.EXTRA_TEXT, post.url); activity.startActivity(Intent.createChooser(mailer, activity.getString(R.string.action_share))); break; } case SHARE_COMMENTS: { final Intent mailer = new Intent(Intent.ACTION_SEND); mailer.setType("text/plain"); mailer.putExtra(Intent.EXTRA_SUBJECT, "Comments for " + post.title); mailer.putExtra(Intent.EXTRA_TEXT, Constants.Reddit.getUri(Constants.Reddit.PATH_COMMENTS + post.idAlone).toString()); activity.startActivity( Intent.createChooser(mailer, activity.getString(R.string.action_share_comments))); break; } case COPY: { ClipboardManager manager = (ClipboardManager) activity.getSystemService(Context.CLIPBOARD_SERVICE); manager.setText(post.url); break; } case GOTO_SUBREDDIT: { final RedditSubreddit subreddit = new RedditSubreddit("/r/" + post.src.subreddit, "/r/" + post.src.subreddit, true); final Intent intent = new Intent(activity, PostListingActivity.class); intent.putExtra("subreddit", subreddit); activity.startActivityForResult(intent, 1); break; } case USER_PROFILE: UserProfileDialog.newInstance(post.src.author).show(activity); break; case PROPERTIES: PostPropertiesDialog.newInstance(post.src).show(activity); break; case COMMENTS: ((RedditPostView.PostSelectionListener) fragmentParent).onPostCommentsSelected(post); break; case LINK: ((RedditPostView.PostSelectionListener) fragmentParent).onPostSelected(post); break; case COMMENTS_SWITCH: if (!(activity instanceof MainActivity)) activity.finish(); ((RedditPostView.PostSelectionListener) fragmentParent).onPostCommentsSelected(post); break; case LINK_SWITCH: if (!(activity instanceof MainActivity)) activity.finish(); ((RedditPostView.PostSelectionListener) fragmentParent).onPostSelected(post); break; case ACTION_MENU: showActionMenu(activity, fragmentParent, post); break; case REPLY: final Intent intent = new Intent(activity, CommentReplyActivity.class); intent.putExtra("parentIdAndType", post.idAndType); activity.startActivity(intent); break; } }
From source file:com.github.hronom.scrape.dat.website.controllers.ScrapeButtonController.java
public void processByJxBrowser() { // Disable fields in view. scrapeView.setWebsiteUrlTextFieldEnabled(false); scrapeView.setSelectorTextFieldEnabled(false); scrapeView.setScrapeButtonEnabled(false); scrapeView.setWorkInProgress(true);//from w ww . ja v a2 s . c om scrapeView.setOutput(""); scrapeView.setProgressBarTaskText("initializing"); logger.info("Start processing..."); long beginTime = System.currentTimeMillis(); // Output input parameters. if (!scrapeView.getWebsiteUrl().isEmpty() && !scrapeView.getSelector().isEmpty()) { logger.info("Input parameters: \"" + scrapeView.getWebsiteUrl() + "\", \"" + scrapeView.getSelector() + "\", \""); } // Navigate to blank page. scrapeView.setProgressBarTaskText("requesting page"); logger.info("Requesting page..."); browser.loadURL(scrapeView.getWebsiteUrl()); // Wait for loading. while (browser.isLoading()) { try { Thread.sleep(1000); } catch (InterruptedException e) { e.printStackTrace(); } } logger.info("Requesting of page completed."); scrapeView.setProgressBarTaskText("viewing page as HTML"); logger.info("View page as HTML"); String html = browser.getHTML(); // Unescape html. scrapeView.setProgressBarTaskText("unescaping HTML"); logger.info("Unescape html"); html = StringEscapeUtils.unescapeHtml4(html); logger.info("Get selector"); String selector = scrapeView.getSelector(); if (!html.isEmpty() && !selector.isEmpty()) { scrapeView.setProgressBarTaskText("parsing HTML"); logger.info("Parse HTML"); Document doc = Jsoup.parse(html); scrapeView.setProgressBarTaskText("selecting elements in HTML"); logger.info("select elements in HTML"); Elements selectedElements = doc.select(selector); if (!selectedElements.isEmpty()) { scrapeView.setProgressBarTaskText("parsing selected elements"); logger.info("Parse extracted elements"); StringBuilder sb = new StringBuilder(); for (Element element : selectedElements) { String body = element.html(); sb.append(body); sb.append("\n"); sb.append("\n"); } scrapeView.setOutput(sb.toString()); } } browser.stop(); long endTime = System.currentTimeMillis(); logger.info("Process time: " + (endTime - beginTime) + " ms."); logger.info("Processing complete."); // Enable fields in view. scrapeView.setWorkInProgress(false); scrapeView.setScrapeButtonEnabled(true); scrapeView.setSelectorTextFieldEnabled(true); scrapeView.setWebsiteUrlTextFieldEnabled(true); }
From source file:com.romeikat.datamessie.core.rss.task.rssCrawling.SourceCrawler.java
private String getTitle(final SyndEntry entry) { String title = entry.getTitle(); title = StringEscapeUtils.unescapeHtml4(title); return title; }