Example usage for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4

List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4.

Prototype

public static final String unescapeHtml4(final String input) 

Source Link

Document

Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes.

Usage

From source file:com.chiorichan.util.WebFunc.java

public static String unescapeHTML(String l) {
    return StringEscapeUtils.unescapeHtml4(l);
}

From source file:elh.eus.absa.CorpusReader.java

/**
 * @param sentences the sentences to set
 *//*from www .ja  va 2s  . co m*/
public void addSentence(String id, String text) {
    this.sentences.put(id, StringEscapeUtils.unescapeHtml4(StringEscapeUtils.unescapeJava(text)));
}

From source file:com.romeikat.datamessie.core.rss.task.maintenance.MaintenanceTask.java

private void unescapeHtmlCharsFromDocument(final TaskExecution taskExecution) throws TaskCancelledException {
    final HibernateSessionProvider sessionProvider = new HibernateSessionProvider(sessionFactory);
    // Get all IDs
    final List<Long> ids = documentDao.getIds(sessionProvider.getStatelessSession());
    sessionProvider.closeStatelessSession();
    // Process IDs in batches
    final List<List<Long>> batches = CollectionUtil.splitIntoSubListsBySize(ids, batchSize);
    for (final List<Long> batch : batches) {
        new ParallelProcessing<Long>(sessionFactory, batch) {
            @Override//  www . j  a  v  a 2  s.  c  o m
            public void doProcessing(final HibernateSessionProvider sessionProvider, final Long documentId) {
                // Unescape characters
                final Document document = documentDao.getEntity(sessionProvider.getStatelessSession(),
                        documentId);
                if (document == null) {
                    return;
                }
                final String title = document.getTitle();
                final String unescapedTitle = StringEscapeUtils.unescapeHtml4(title);
                document.setTitle(unescapedTitle);
                final String description = document.getDescription();
                final String unescapedDescription = StringEscapeUtils.unescapeHtml4(description);
                document.setDescription(unescapedDescription);
                final boolean titleChanged = title != null && unescapedTitle != null
                        && !title.equals(unescapedTitle);
                final boolean descriptionChanged = description != null && unescapedDescription != null
                        && !description.equals(unescapedDescription);
                if (titleChanged || descriptionChanged) {
                    documentDao.update(sessionProvider.getStatelessSession(), document);
                    LOG.info("Unescaped HTML characters from document {}", documentId);
                }
            }
        };
        taskExecution.checkpoint();
    }
}

From source file:com.glowinteractive.reforger.Item.java

@Override
public boolean visit(TagNode parent, HtmlNode current) {
    Pattern p = null;/*from w  w  w  . j a  va  2s  .co  m*/
    Matcher m = null;

    String text = null;
    Stat key = null;
    int value = -1;

    boolean PARSE_MUTABLE = false;

    int GROUP_INDEX_KEY = -1, GROUP_INDEX_VAL = -1;

    if (current instanceof CommentNode) {
        final CommentNode c = (CommentNode) current;
        final String content = c.getCommentedContent();

        if (content.matches("<!--rtg\\d\\d-->")) {
            //<editor-fold defaultstate="collapsed" desc="Secondary Stats for Non-Random Itemization Pieces.">
            //   e.g.: "Equip: Increases your critical strike rating by 168"

            GROUP_INDEX_KEY = 1;
            GROUP_INDEX_VAL = 2;

            PARSE_MUTABLE = true;

            text = parent.getText().toString().replace("&nbsp;", " ");
            p = Pattern.compile(TOOLTIP_RATING_NON_RANDOM);
            //</editor-fold>
        }

        if (content.matches("<!--ee-->")) {
            //<editor-fold defaultstate="collapsed" desc="Permanent enchant effects.">
            //   e.g.: "+190 Attack Power and +55 Critical Strike Rating"

            GROUP_INDEX_KEY = 2;
            GROUP_INDEX_VAL = 1;

            PARSE_MUTABLE = false;

            text = parent.getText().toString();
            p = Pattern.compile(TOOLTIP_RATING_RANDOM_OR_BONUS);
            //</editor-fold>
        }
    }

    if (current instanceof ContentNode) {
        final ContentNode c = (ContentNode) current;
        final String content = c.getContent().toString();

        if ("q1".equals(parent.getAttributeByName("class"))) {
            //<editor-fold defaultstate="collapsed" desc="Secondary Stats for Random Itemization Pieces.">
            //   e.g.: "+168 Critical Strike Rating"

            GROUP_INDEX_KEY = 2;
            GROUP_INDEX_VAL = 1;

            PARSE_MUTABLE = true;

            text = content;
            p = Pattern.compile(TOOLTIP_RATING_RANDOM_OR_BONUS);
            //</editor-fold>
        }

        if (parent.getAttributeByName("class") != null
                && parent.getAttributeByName("class").startsWith("socket-")) {
            //<editor-fold defaultstate="collapsed" desc="Secondary Stats for gems.">
            //   e.g.: "+20 Agility and +20 Mastery Rating"

            GROUP_INDEX_KEY = 2;
            GROUP_INDEX_VAL = 1;

            PARSE_MUTABLE = false;

            text = content;
            p = Pattern.compile(TOOLTIP_RATING_RANDOM_OR_BONUS);
            //</editor-fold>
        }

        if (content.startsWith("Socket Bonus") && "q2".equals(parent.getAttributeByName("class"))) {
            //<editor-fold defaultstate="collapsed" desc="Secondary Stats for Socket Bonuses.">
            //   e.g.: "Socket Bonus: +30 Haste"

            GROUP_INDEX_KEY = 2;
            GROUP_INDEX_VAL = 1;

            PARSE_MUTABLE = false;

            text = content;
            p = Pattern.compile(TOOLTIP_RATING_RANDOM_OR_BONUS);
            //</editor-fold>
        }
    }

    if (text == null) {
        // Didn't match any known constructs.
        return true;
    }

    m = p.matcher(StringEscapeUtils.unescapeHtml4(text));

    while (m.find()) {
        key = null;
        value = Integer.parseInt(m.group(GROUP_INDEX_VAL));

        for (Stat s : Stat.values()) {
            if (s.shortName().equalsIgnoreCase(m.group(GROUP_INDEX_KEY))) {
                key = s;
            }
        }

        // TODO: Refactor output to parse() via mutable / immutable extension to StatKVMap.
        if (key != null) {
            if (PARSE_MUTABLE) {
                _mutableStats = _mutableStats.add(new StatKVPair(key, value));
                System.out.println(String.format("    %+5d", value) + " " + key.shortName());
            } else {
                _immutableStats = _immutableStats.add(new StatKVPair(key, value));
                System.out.println(String.format("    %+5d", value) + " " + key.shortName() + " [Immutable]");
            }
        }
    }

    return true;
}

From source file:fr.mcc.ginco.rest.services.ThesaurusRestService.java

/**
 * Public method used to archive thesaurus
 *
 * @throws IOException//w w  w .  j a v a 2 s  .  c o  m
 */
@GET
@Path("/archiveVocabulary")
@Consumes({ MediaType.APPLICATION_JSON })
@Produces(MediaType.TEXT_HTML)
@PreAuthorize("hasPermission(#thesaurusId, '0')")
public String archiveVocabulary(@QueryParam("thesaurusId") String thesaurusId) throws IOException {
    Thesaurus object = thesaurusService.getThesaurusById(thesaurusId);

    ThesaurusView view = null;

    if (object != null) {

        Thesaurus result = thesaurusService.archiveThesaurus(object);

        if (result != null) {
            view = thesaurusViewConverter.convert(result);
            //Update vocabulary date
            thesaurusService.updateThesaurusDate(object);
        } else {
            logger.error("Failed to archive thesaurus");
        }
    }

    ObjectMapper mapper = new ObjectMapper();
    String serialized = mapper.writeValueAsString(new ExtJsonFormLoadData(view));
    return StringEscapeUtils.unescapeHtml4(serialized);
}

From source file:de.uni_potsdam.hpi.bpt.promnicat.importer.npb.NPBImporter.java

/**
 * Parses all meta data from the given {@link Element}'s 'metaInformation' tags.
 * //from   ww w . j  a  v a2 s . co  m
 * @param rootElement the root element of the XML to consider for parsing
 * @return all parsed meta data
 */
private Map<String, Collection<String>> parseMetaData(Element rootElement) {
    Map<String, Collection<String>> metaData = new HashMap<String, Collection<String>>();
    List<?> metaDataElements = rootElement.getChildren(KEY_META_INFORMATION, rootElement.getNamespace());
    for (Object metaDataElement : metaDataElements) {
        if (metaDataElement instanceof Element
                && ((Element) metaDataElement).getName().equals(KEY_META_INFORMATION)) {
            List<?> metaInformation = ((Element) metaDataElement).getChildren(KEY_SCHLUESSEL,
                    rootElement.getNamespace());
            if (!metaInformation.isEmpty()) {
                String key = ((Element) metaInformation.get(0)).getChild(KEY_CODE).getText();
                metaInformation = ((Element) metaDataElement).getChildren(KEY_WERT, rootElement.getNamespace());
                Collection<String> values = new ArrayList<String>();
                if (!metaInformation.isEmpty()) {
                    Element valueElement = (Element) metaInformation.get(0);
                    if (valueElement.getChild(KEY_LISTE, rootElement.getNamespace()) == null) {
                        //single value
                        Element value = ((Element) valueElement.getChildren().get(0));
                        values.add(StringEscapeUtils.unescapeHtml4(value.getText()));
                        String name = value.getAttributeValue(KEY_NAME);
                        if (name != null) {
                            values.add(StringEscapeUtils.unescapeHtml4(name));
                        }
                    } else {
                        //parse all values
                        List<?> valueElements = valueElement.getChild(KEY_LISTE, rootElement.getNamespace())
                                .getChildren();
                        for (Object value : valueElements) {
                            values.add(StringEscapeUtils.unescapeHtml4(((Element) value).getText()));
                        }
                    }
                }
                metaData.put(key, values);
            }
        }
    }
    return metaData;
}

From source file:de.akra.idocit.wsdl.services.DocumentationParser.java

/**
 * Reads the child nodes from the addressee element and converts the &lt;br /&gt; and
 * &lt;tab /&gt; elements to the corresponding ASCII characters. Unnecessary newlines
 * and tabulators are removed from the text. After the text is build
 * {@link String#trim()} is run on it./*  w  ww. j a  va2 s.c  om*/
 * 
 * @param addresseeElement
 *            The addressee element from which the text should be read.
 * @return The normalized text.
 */
private static String readTextFromAddresseElement(Node addresseeElement) {
    StringBuilder text = new StringBuilder();
    NodeList nodes = addresseeElement.getChildNodes();

    for (int i = 0; i < nodes.getLength(); ++i) {
        Node node = nodes.item(i);
        switch (node.getNodeType()) {
        case Node.TEXT_NODE:
            String unescapedText = StringEscapeUtils.unescapeHtml4(node.getNodeValue());
            text.append(StringUtils.cleanFormatting(unescapedText));
            break;
        case Node.ELEMENT_NODE:
            if (node.getNodeName().equalsIgnoreCase(HTML_TAG_BR)) {
                text.append(System.getProperty("line.separator"));
            } else if (node.getNodeName().equalsIgnoreCase(HTML_TAG_TAB)) {
                text.append('\t');
            }
            break;
        default: {
            // Do nothing!
            logger.info("The node-type is " + node.getNodeType());
        }
        }
    }
    return text.toString().trim();
}

From source file:com.ryan.ryanreader.reddit.prepared.RedditPreparedPost.java

public static void onActionMenuItemSelected(final RedditPreparedPost post, final Fragment fragmentParent,
        final Action action) {

    final Activity activity = fragmentParent.getSupportActivity();

    switch (action) {

    case UPVOTE:/*from   w  w w.j  a  v a 2 s. c o  m*/
        post.action(activity, RedditAPI.RedditAction.UPVOTE);
        break;

    case DOWNVOTE:
        post.action(activity, RedditAPI.RedditAction.DOWNVOTE);
        break;

    case UNVOTE:
        post.action(activity, RedditAPI.RedditAction.UNVOTE);
        break;

    case SAVE:
        post.action(activity, RedditAPI.RedditAction.SAVE);
        break;

    case UNSAVE:
        post.action(activity, RedditAPI.RedditAction.UNSAVE);
        break;

    case HIDE:
        post.action(activity, RedditAPI.RedditAction.HIDE);
        break;

    case UNHIDE:
        post.action(activity, RedditAPI.RedditAction.UNHIDE);
        break;

    case REPORT:

        new AlertDialog.Builder(activity).setTitle(R.string.action_report)
                .setMessage(R.string.action_report_sure)
                .setPositiveButton(R.string.action_report, new DialogInterface.OnClickListener() {
                    public void onClick(final DialogInterface dialog, final int which) {
                        post.action(activity, RedditAPI.RedditAction.REPORT);
                        // TODO update the view to show the result
                        // TODO don't forget, this also hides
                    }
                }).setNegativeButton(R.string.dialog_cancel, null).show();

        break;

    case EXTERNAL: {
        final Intent intent = new Intent(Intent.ACTION_VIEW);
        intent.setData(Uri.parse(post.url));
        activity.startActivity(intent);
        break;
    }

    case SELFTEXT_LINKS: {

        final HashSet<String> linksInComment = LinkHandler
                .computeAllLinks(StringEscapeUtils.unescapeHtml4(post.src.selftext));

        if (linksInComment.isEmpty()) {
            General.quickToast(activity, R.string.error_toast_no_urls_in_self);

        } else {

            final String[] linksArr = linksInComment.toArray(new String[linksInComment.size()]);

            final AlertDialog.Builder builder = new AlertDialog.Builder(activity);
            builder.setItems(linksArr, new DialogInterface.OnClickListener() {
                public void onClick(DialogInterface dialog, int which) {
                    LinkHandler.onLinkClicked(activity, linksArr[which], false, post.src);
                    dialog.dismiss();
                }
            });

            final AlertDialog alert = builder.create();
            alert.setTitle(R.string.action_selftext_links);
            alert.setCanceledOnTouchOutside(true);
            alert.show();
        }

        break;
    }

    case SAVE_IMAGE: {

        final RedditAccount anon = RedditAccountManager.getAnon();

        CacheManager.getInstance(activity)
                .makeRequest(new CacheRequest(General.uriFromString(post.imageUrl), anon, null,
                        Constants.Priority.IMAGE_VIEW, 0, CacheRequest.DownloadType.IF_NECESSARY,
                        Constants.FileType.IMAGE, false, false, false, activity) {

                    @Override
                    protected void onCallbackException(Throwable t) {
                        BugReportActivity.handleGlobalError(context, t);
                    }

                    @Override
                    protected void onDownloadNecessary() {
                        General.quickToast(context, R.string.download_downloading);
                    }

                    @Override
                    protected void onDownloadStarted() {
                    }

                    @Override
                    protected void onFailure(RequestFailureType type, Throwable t, StatusLine status,
                            String readableMessage) {
                        final RRError error = General.getGeneralErrorForFailure(context, type, t, status);
                        General.showResultDialog(activity, error);
                    }

                    @Override
                    protected void onProgress(long bytesRead, long totalBytes) {
                    }

                    @Override
                    protected void onSuccess(CacheManager.ReadableCacheFile cacheFile, long timestamp,
                            UUID session, boolean fromCache, String mimetype) {

                        File dst = new File(
                                Environment.getExternalStoragePublicDirectory(Environment.DIRECTORY_PICTURES),
                                General.uriFromString(post.imageUrl).getPath());

                        if (dst.exists()) {
                            int count = 0;

                            while (dst.exists()) {
                                count++;
                                dst = new File(
                                        Environment.getExternalStoragePublicDirectory(
                                                Environment.DIRECTORY_PICTURES),
                                        count + "_"
                                                + General.uriFromString(post.imageUrl).getPath().substring(1));
                            }
                        }

                        try {
                            General.copyFile(cacheFile.getInputStream(), dst);
                        } catch (IOException e) {
                            notifyFailure(RequestFailureType.STORAGE, e, null, "Could not copy file");
                            return;
                        }

                        activity.sendBroadcast(new Intent(Intent.ACTION_MEDIA_SCANNER_SCAN_FILE,
                                Uri.parse("file://" + dst.getAbsolutePath())));

                        General.quickToast(context, context.getString(R.string.action_save_image_success) + " "
                                + dst.getAbsolutePath());
                    }
                });

        break;
    }

    case SHARE: {

        final Intent mailer = new Intent(Intent.ACTION_SEND);
        mailer.setType("text/plain");
        mailer.putExtra(Intent.EXTRA_SUBJECT, post.title);
        mailer.putExtra(Intent.EXTRA_TEXT, post.url);
        activity.startActivity(Intent.createChooser(mailer, activity.getString(R.string.action_share)));
        break;
    }

    case SHARE_COMMENTS: {

        final Intent mailer = new Intent(Intent.ACTION_SEND);
        mailer.setType("text/plain");
        mailer.putExtra(Intent.EXTRA_SUBJECT, "Comments for " + post.title);
        mailer.putExtra(Intent.EXTRA_TEXT,
                Constants.Reddit.getUri(Constants.Reddit.PATH_COMMENTS + post.idAlone).toString());
        activity.startActivity(
                Intent.createChooser(mailer, activity.getString(R.string.action_share_comments)));
        break;
    }

    case COPY: {

        ClipboardManager manager = (ClipboardManager) activity.getSystemService(Context.CLIPBOARD_SERVICE);
        manager.setText(post.url);
        break;
    }

    case GOTO_SUBREDDIT: {

        final RedditSubreddit subreddit = new RedditSubreddit("/r/" + post.src.subreddit,
                "/r/" + post.src.subreddit, true);

        final Intent intent = new Intent(activity, PostListingActivity.class);
        intent.putExtra("subreddit", subreddit);
        activity.startActivityForResult(intent, 1);
        break;
    }

    case USER_PROFILE:
        UserProfileDialog.newInstance(post.src.author).show(activity);
        break;

    case PROPERTIES:
        PostPropertiesDialog.newInstance(post.src).show(activity);
        break;

    case COMMENTS:
        ((RedditPostView.PostSelectionListener) fragmentParent).onPostCommentsSelected(post);
        break;

    case LINK:
        ((RedditPostView.PostSelectionListener) fragmentParent).onPostSelected(post);
        break;

    case COMMENTS_SWITCH:
        if (!(activity instanceof MainActivity))
            activity.finish();
        ((RedditPostView.PostSelectionListener) fragmentParent).onPostCommentsSelected(post);
        break;

    case LINK_SWITCH:
        if (!(activity instanceof MainActivity))
            activity.finish();
        ((RedditPostView.PostSelectionListener) fragmentParent).onPostSelected(post);
        break;

    case ACTION_MENU:
        showActionMenu(activity, fragmentParent, post);
        break;

    case REPLY:
        final Intent intent = new Intent(activity, CommentReplyActivity.class);
        intent.putExtra("parentIdAndType", post.idAndType);
        activity.startActivity(intent);
        break;
    }
}

From source file:com.github.hronom.scrape.dat.website.controllers.ScrapeButtonController.java

public void processByJxBrowser() {
    // Disable fields in view.
    scrapeView.setWebsiteUrlTextFieldEnabled(false);
    scrapeView.setSelectorTextFieldEnabled(false);
    scrapeView.setScrapeButtonEnabled(false);
    scrapeView.setWorkInProgress(true);//from w ww  .  ja  v  a2 s  . c  om
    scrapeView.setOutput("");

    scrapeView.setProgressBarTaskText("initializing");
    logger.info("Start processing...");
    long beginTime = System.currentTimeMillis();

    // Output input parameters.
    if (!scrapeView.getWebsiteUrl().isEmpty() && !scrapeView.getSelector().isEmpty()) {
        logger.info("Input parameters: \"" + scrapeView.getWebsiteUrl() + "\", \"" + scrapeView.getSelector()
                + "\", \"");
    }

    // Navigate to blank page.
    scrapeView.setProgressBarTaskText("requesting page");
    logger.info("Requesting page...");
    browser.loadURL(scrapeView.getWebsiteUrl());
    // Wait for loading.
    while (browser.isLoading()) {
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
    logger.info("Requesting of page completed.");

    scrapeView.setProgressBarTaskText("viewing page as HTML");
    logger.info("View page as HTML");
    String html = browser.getHTML();

    // Unescape html.
    scrapeView.setProgressBarTaskText("unescaping HTML");
    logger.info("Unescape html");
    html = StringEscapeUtils.unescapeHtml4(html);

    logger.info("Get selector");
    String selector = scrapeView.getSelector();
    if (!html.isEmpty() && !selector.isEmpty()) {
        scrapeView.setProgressBarTaskText("parsing HTML");
        logger.info("Parse HTML");
        Document doc = Jsoup.parse(html);

        scrapeView.setProgressBarTaskText("selecting elements in HTML");
        logger.info("select elements in HTML");
        Elements selectedElements = doc.select(selector);

        if (!selectedElements.isEmpty()) {
            scrapeView.setProgressBarTaskText("parsing selected elements");
            logger.info("Parse extracted elements");
            StringBuilder sb = new StringBuilder();
            for (Element element : selectedElements) {
                String body = element.html();
                sb.append(body);
                sb.append("\n");
                sb.append("\n");
            }
            scrapeView.setOutput(sb.toString());
        }
    }

    browser.stop();

    long endTime = System.currentTimeMillis();
    logger.info("Process time: " + (endTime - beginTime) + " ms.");
    logger.info("Processing complete.");

    // Enable fields in view.
    scrapeView.setWorkInProgress(false);
    scrapeView.setScrapeButtonEnabled(true);
    scrapeView.setSelectorTextFieldEnabled(true);
    scrapeView.setWebsiteUrlTextFieldEnabled(true);
}

From source file:com.romeikat.datamessie.core.rss.task.rssCrawling.SourceCrawler.java

private String getTitle(final SyndEntry entry) {
    String title = entry.getTitle();
    title = StringEscapeUtils.unescapeHtml4(title);
    return title;
}