Example usage for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4

List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4.

Prototype

public static final String unescapeHtml4(final String input) 

Source Link

Document

Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes.

Usage

From source file:com.zestedesavoir.zestwriter.model.Content.java

public void saveToHtml(File file, MdTextController index) {
    try (FileOutputStream fos = new FileOutputStream(file)) {
        BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fos, "UTF8"));
        String mdValue = exportContentToMarkdown(0, getDepth());
        String htmlValue = StringEscapeUtils.unescapeHtml4(index.markdownToHtml(mdValue));
        htmlValue = normalizeHtml(htmlValue);
        writer.append(MainApp.getMdUtils().addHeaderAndFooterStrict(htmlValue, getTitle()));
        writer.flush();//from   w w w. ja va 2s  .  c om
    } catch (Exception e) {
        MainApp.getLogger().error(e.getMessage(), e);
    }
}

From source file:com.wellsandwhistles.android.redditsp.image.ImageInfo.java

public static ImageInfo parseImgurV3(final JsonBufferedObject object) throws IOException, InterruptedException {

    String id = null;// w  w w  .java 2 s  .  com
    String urlOriginal = null;
    String thumbnailUrl = null;
    String title = null;
    String caption = null;
    String type = null;
    boolean isAnimated = false;
    Long width = null;
    Long height = null;
    Long size = null;
    boolean mp4 = false;

    if (object != null) {
        id = object.getString("id");
        title = object.getString("title");
        caption = object.getString("description");
        type = object.getString("type");
        isAnimated = object.getBoolean("animated");
        width = object.getLong("width");
        height = object.getLong("height");
        size = object.getLong("size");

        if (object.getString("mp4") != null) {
            urlOriginal = object.getString("mp4");
            mp4 = true;
            size = object.getLong("mp4_size");
        } else {
            urlOriginal = object.getString("link");
        }
    }

    if (title != null) {
        title = StringEscapeUtils.unescapeHtml4(title);
    }

    if (caption != null) {
        caption = StringEscapeUtils.unescapeHtml4(caption);
    }

    if (id != null) {
        thumbnailUrl = "https://i.imgur.com/" + id + "b.jpg";
    }

    return new ImageInfo(urlOriginal, thumbnailUrl, title, caption, type, isAnimated, width, height, size,
            mp4 ? MediaType.VIDEO : MediaType.IMAGE);
}

From source file:com.navercorp.pinpoint.web.controller.BusinessTransactionController.java

@RequestMapping(value = "/bind", method = RequestMethod.POST)
@ResponseBody//from   ww w . j  a va2s  .c o m
public String metaDataBind(@RequestParam("type") String type, @RequestParam("metaData") String metaData,
        @RequestParam("bind") String bind) {
    if (logger.isDebugEnabled()) {
        logger.debug("POST /bind params {metaData={}, bind={}}", metaData, bind);
    }

    if (metaData == null) {
        return "";
    }

    List<String> bindValues;
    String combinedResult = "";

    if (type.equals("sql")) {
        bindValues = parameterParser.parseOutputParameter(bind);
        combinedResult = sqlParser.combineBindValues(metaData, bindValues);
    } else if (type.equals("mongoJson")) {
        bindValues = parameterJsonParser.parseOutputParameter(bind);
        combinedResult = mongoJsonParser.combineBindValues(metaData, bindValues);
    }

    if (logger.isDebugEnabled()) {
        logger.debug("Combined result={}", combinedResult);
    }

    if (type.equals("mongoJson")) {
        return StringEscapeUtils.unescapeHtml4(combinedResult);
    }

    return StringEscapeUtils.escapeHtml4(combinedResult);
}

From source file:com.nttec.everychan.chans.makaba.MakabaJsonMapper.java

static PostModel mapPostModel(JSONObject source, String boardName) throws JSONException {
    PostModel model = new PostModel();

    try {/*from ww  w.ja  va 2s.c  o m*/
        model.number = source.getString("num");
    } catch (JSONException e) {
        model.number = Long.toString(source.getLong("num"));
    }
    model.name = StringEscapeUtils
            .unescapeHtml4(RegexUtils.removeHtmlSpanTags(getStringSafe(source, "name", "")));
    model.subject = StringEscapeUtils.unescapeHtml4(getStringSafe(source, "subject", ""));
    model.comment = getStringSafe(source, "comment", "");
    model.email = getStringSafe(source, "email", "");
    if (model.email.startsWith("mailto:"))
        model.email = model.email.substring(7);
    model.trip = getStringSafe(source, "trip", "");
    if (model.trip != null) {
        if (model.trip.equals("!!%adm%!!"))
            model.trip = "## Abu ##";
        else if (model.trip.equals("!!%mod%!!"))
            model.trip = "## Mod ##";
        else if (model.trip.equals("!!%Inquisitor%!!"))
            model.trip = "## Applejack ##";
        else if (model.trip.equals("!!%coder%!!"))
            model.trip = "##  ##";
    }
    model.icons = parseIcons(getStringSafe(source, "icon", ""));
    model.op = getIntSafe(source, "op", 0) == 1;
    model.sage = model.email.toLowerCase(Locale.US).contains("sage") || model.name.contains("ID:\u00A0Heaven");
    model.timestamp = source.getLong("timestamp") * 1000;
    model.parentThread = getStringSafe(source, "parent", model.number);
    if (model.parentThread.equals("0"))
        model.parentThread = model.number;

    if (source.has("files")) {
        JSONArray filesArray = source.getJSONArray("files");
        model.attachments = new AttachmentModel[filesArray.length()];
        for (int i = 0; i < filesArray.length(); ++i) {
            model.attachments[i] = mapAttachmentModel(filesArray.getJSONObject(i), boardName);
        }
    } else
        model.attachments = null;

    int banned = getIntSafe(source, "banned", 0);
    switch (banned) {
    case 1:
        model.comment = model.comment
                + "<br/><em><font color=\"red\">(? ? ?  . ?.)</font></em>";
        break;
    case 2:
        model.comment = model.comment
                + "<br/><em><font color=\"red\">(? ? ?  .)</font></em>";
        break;
    }

    return model;
}

From source file:com.nttec.everychan.chans.krautchan.KrautCatalogReader.java

private void handleFilter(int filterIndex) throws IOException {
    switch (filterIndex) {
    case FILTER_THREAD_NUMBER:
        currentThread.posts[0].number = readUntilSequence(FILTERS_CLOSE[filterIndex]);
        break;//w w  w.  java  2  s . c o m
    case FILTER_THREAD_TITLE:
        String headerHtml = readUntilSequence(FILTERS_CLOSE[filterIndex]);
        int countryBallIndex = headerHtml.indexOf("<img class=\"post_country\" src=\"/images/balls/");
        if (countryBallIndex == -1) {
            countryBallIndex = headerHtml.indexOf("<img class=\"post_country\" src=\"/images/warballs/");
        }
        if (countryBallIndex != -1) {
            int start = countryBallIndex + 31;
            int end = headerHtml.indexOf('\"', start);
            if (end != -1) {
                BadgeIconModel icon = new BadgeIconModel();
                icon.source = headerHtml.substring(start, end);
                currentThread.posts[0].icons = new BadgeIconModel[] { icon };
            }
        }
        currentThread.posts[0].subject = StringEscapeUtils.unescapeHtml4(RegexUtils.removeHtmlTags(headerHtml))
                .trim();
        break;
    case FILTER_THUMBNAIL:
        AttachmentModel attachment = new AttachmentModel();
        attachment.type = AttachmentModel.TYPE_IMAGE_STATIC;
        attachment.size = -1;
        attachment.width = -1;
        attachment.height = -1;
        attachment.thumbnail = "/thumbnails/" + readUntilSequence(FILTERS_CLOSE[filterIndex]);
        attachment.path = attachment.thumbnail.replace("/thumbnails/", "/files/");
        currentThread.posts[0].attachments = new AttachmentModel[] { attachment };
        break;
    case FILTER_OMITTED:
        parseOmittedString(readUntilSequence(FILTERS_CLOSE[filterIndex]));
        break;
    case FILTER_POST:
        skipUntilSequence(SECTION_OPEN);
        currentThread.posts[0].comment = readUntilSequence(SECTION_CLOSE);
        break;
    case FILTER_THREAD_END:
        finalizeThread();
        break;
    }
}

From source file:com.github.hronom.scrape.dat.website.controllers.ScrapeButtonController.java

public void processByHtmlUnit() {
    // Disable fields in view.
    scrapeView.setWebsiteUrlTextFieldEnabled(false);
    scrapeView.setSelectorTextFieldEnabled(false);
    scrapeView.setScrapeButtonEnabled(false);
    scrapeView.setWorkInProgress(true);//from  ww w . j ava2 s  .c  o m
    scrapeView.setOutput("");

    scrapeView.setProgressBarTaskText("initializing");
    logger.info("Start processing...");
    long beginTime = System.currentTimeMillis();

    // Output input parameters.
    if (!scrapeView.getWebsiteUrl().isEmpty() && !scrapeView.getSelector().isEmpty()) {
        logger.info("Input parameters: \"" + scrapeView.getWebsiteUrl() + "\", \"" + scrapeView.getSelector()
                + "\", \"");
    }

    // Process.
    try {
        URL url = new URL(scrapeView.getWebsiteUrl());
        scrapeView.setProgressBarTaskText("requesting page");
        logger.info("Requesting page...");
        HtmlPage page = webClient.getPage(url);
        logger.info("Requesting of page completed.");

        scrapeView.setProgressBarTaskText("viewing page as XML");
        logger.info("View page as XML");
        String xml = page.asXml();

        // Unescape html.
        scrapeView.setProgressBarTaskText("unescaping HTML");
        logger.info("Unescape html");
        xml = StringEscapeUtils.unescapeHtml4(xml);

        logger.info("Get selector");
        String selector = scrapeView.getSelector();
        if (!xml.isEmpty() && !selector.isEmpty()) {
            scrapeView.setProgressBarTaskText("parsing HTML");
            logger.info("Parse HTML");
            Document doc = Jsoup.parse(xml);

            scrapeView.setProgressBarTaskText("selecting elements in HTML");
            logger.info("select elements in HTML");
            Elements selectedElements = doc.select(selector);

            if (!selectedElements.isEmpty()) {
                scrapeView.setProgressBarTaskText("parsing selected elements");
                logger.info("Parse extracted elements");
                StringBuilder sb = new StringBuilder();
                for (Element element : selectedElements) {
                    String body = element.html();
                    sb.append(body);
                    sb.append("\n");
                    sb.append("\n");
                }
                scrapeView.setOutput(sb.toString());
            }
        }
    } catch (Exception e) {
        logger.error(e);
    }

    webClient.close();

    long endTime = System.currentTimeMillis();
    logger.info("Process time: " + (endTime - beginTime) + " ms.");
    logger.info("Processing complete.");

    // Enable fields in view.
    scrapeView.setWorkInProgress(false);
    scrapeView.setScrapeButtonEnabled(true);
    scrapeView.setSelectorTextFieldEnabled(true);
    scrapeView.setWebsiteUrlTextFieldEnabled(true);
}

From source file:de.dplatz.padersprinter.control.TripService.java

static String parseStringNode(Node node, String expr, XPath xpath) throws XPathExpressionException {
    String val = (String) xpath.evaluate(expr, node, XPathConstants.STRING);
    val = StringEscapeUtils.unescapeHtml4(val);
    return val;
}

From source file:mServer.crawler.sender.MediathekBr.java

private void getTheman() {
    final String ADRESSE = "http://www.br.de/mediathek/video/sendungen/index.html";
    final String MUSTER_URL = "<a href=\"/mediathek/video/";
    final String MUSTER_URL_1 = "sendungen/";
    final String MUSTER_URL_2 = "video/";
    listeThemen.clear();//from w ww  .ja v a 2s .  com
    MSStringBuilder seite = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
    GetUrl getUrlIo = new GetUrl(getWartenSeiteLaden());
    seite = getUrlIo.getUri(SENDERNAME, ADRESSE, StandardCharsets.UTF_8, 5 /* versuche */, seite, "");
    int pos1;
    int pos2;
    String url = "";
    if ((pos1 = seite.indexOf("<ul class=\"clearFix\">")) != -1) {
        while ((pos1 = seite.indexOf(MUSTER_URL, pos1)) != -1) {
            if (Config.getStop()) {
                break;
            }

            try {
                pos1 += MUSTER_URL.length();
                if ((pos2 = seite.indexOf("\"", pos1)) != -1) {
                    url = seite.substring(pos1, pos2);
                }
                String thema = seite.extract("<span>", "<", pos1);
                thema = StringEscapeUtils.unescapeXml(thema.trim());
                thema = StringEscapeUtils.unescapeHtml4(thema.trim());
                if (!listeAlleThemen.contains(thema)) {
                    listeAlleThemen.add(thema);
                }
                if (url.isEmpty() || (!url.startsWith(MUSTER_URL_1) && !url.startsWith(MUSTER_URL_2))) {
                    continue;
                }
                /// der BR ist etwas zu langsam dafr????
                //                    // in die Liste eintragen
                //                    String[] add;
                //                    if (MSearchConfig.senderAllesLaden) {
                //                        add = new String[]{"http://www.br.de/mediathek/video/sendungen/" + url + "#seriesMoreCount=10", ""};
                //                    } else {
                //                        add = new String[]{"http://www.br.de/mediathek/video/sendungen/" + url, ""};
                //                    }
                // in die Liste eintragen
                String[] add = new String[] { "http://www.br.de/mediathek/video/" + url, thema };
                listeThemen.addUrl(add);
            } catch (Exception ex) {
                Log.errorLog(821213698, ex);
            }
        }
    }
}

From source file:com.nttec.everychan.chans.cirno.CirnoCatalogReader.java

private void handleFilter(int filterIndex) throws IOException {
    switch (filterIndex) {
    case FILTER_START:
        String start = readUntilSequence(FILTERS_CLOSE[filterIndex]).trim();
        Matcher matcher = PATTERN_START.matcher(start);
        if (matcher.matches()) {
            currentThread.posts[0].number = matcher.group(1);
            try {
                currentThread.posts[0].timestamp = DateFormats.IICHAN_DATE_FORMAT.parse(matcher.group(2))
                        .getTime();//from   w w  w.  ja  v  a 2s.com
            } catch (Exception e) {

            }
        } else {
            StringBuilder number = new StringBuilder();
            char ch;
            int i = 0;
            while ((ch = start.charAt(i++)) >= '0' && ch <= '9')
                number.append(ch);
            if (number.length() > 0)
                currentThread.posts[0].number = number.toString();
        }
        break;
    case FILTER_THUMBNAIL:
        AttachmentModel attachment = new AttachmentModel();
        attachment.type = AttachmentModel.TYPE_IMAGE_STATIC;
        attachment.size = -1;
        attachment.width = -1;
        attachment.height = -1;
        attachment.thumbnail = readUntilSequence(FILTERS_CLOSE[filterIndex]);
        attachment.path = attachment.thumbnail.replace("/thumb/", "/src/").replaceAll("(\\d+)s\\.", "$1.");
        currentThread.posts[0].attachments = new AttachmentModel[] { attachment };
        break;
    case FILTER_SUBJECT:
        currentThread.posts[0].subject = StringEscapeUtils
                .unescapeHtml4(readUntilSequence(FILTERS_CLOSE[filterIndex])).trim();
        break;
    case FILTER_COMMENT:
        currentThread.posts[0].comment = readUntilSequence(FILTERS_CLOSE[filterIndex]);
        break;
    case FILTER_END:
        finalizeThread();
        break;
    }
}

From source file:com.seleniumtests.connectors.mails.ImapClient.java

/**
 * get list of all emails in folder/*w w w  .  ja v a  2s  . com*/
 * 
 * @param folderName      folder to read
 * @param firstMessageTime   date from which we should get messages
 * @param firstMessageIndex index of the firste message to find
 * @throws MessagingException
 * @throws IOException
 */
@Override
public List<Email> getEmails(String folderName, int firstMessageIndex, LocalDateTime firstMessageTime)
        throws MessagingException, IOException {

    if (folderName == null) {
        throw new MessagingException("folder ne doit pas tre vide");
    }

    // Get folder
    Folder folder = store.getFolder(folderName);
    folder.open(Folder.READ_ONLY);

    // Get directory
    Message[] messages = folder.getMessages();

    List<Message> preFilteredMessages = new ArrayList<>();

    final LocalDateTime firstTime = firstMessageTime;

    // on filtre les message en fonction du mode de recherche
    if (searchMode == SearchMode.BY_INDEX || firstTime == null) {
        for (int i = firstMessageIndex, n = messages.length; i < n; i++) {
            preFilteredMessages.add(messages[i]);
        }
    } else {
        preFilteredMessages = Arrays.asList(folder.search(new SearchTerm() {
            private static final long serialVersionUID = 1L;

            @Override
            public boolean match(Message msg) {
                try {
                    return !msg.getReceivedDate()
                            .before(Date.from(firstTime.atZone(ZoneId.systemDefault()).toInstant()));
                } catch (MessagingException e) {
                    return false;
                }
            }
        }));

    }

    List<Email> filteredEmails = new ArrayList<>();
    lastMessageIndex = messages.length;

    for (Message message : preFilteredMessages) {

        String contentType = "";
        try {
            contentType = message.getContentType();
        } catch (MessagingException e) {
            MimeMessage msg = (MimeMessage) message;
            message = new MimeMessage(msg);
            contentType = message.getContentType();
        }

        // decode content
        String messageContent = "";
        List<String> attachments = new ArrayList<>();

        if (contentType.toLowerCase().contains("text/html")) {
            messageContent += StringEscapeUtils.unescapeHtml4(message.getContent().toString());
        } else if (contentType.toLowerCase().contains("multipart/")) {
            List<BodyPart> partList = getMessageParts((Multipart) message.getContent());

            // store content in list
            for (BodyPart part : partList) {

                String partContentType = part.getContentType().toLowerCase();
                if (partContentType.contains("text/html")) {
                    messageContent = messageContent
                            .concat(StringEscapeUtils.unescapeHtml4(part.getContent().toString()));

                } else if (partContentType.contains("text/") && !partContentType.contains("vcard")) {
                    messageContent = messageContent.concat((String) part.getContent().toString());

                } else if (partContentType.contains("image") || partContentType.contains("application/")
                        || partContentType.contains("text/x-vcard")) {
                    if (part.getFileName() != null) {
                        attachments.add(part.getFileName());
                    } else {
                        attachments.add(part.getDescription());
                    }
                } else {
                    logger.debug("type: " + part.getContentType());
                }
            }
        }

        // create a new email
        filteredEmails.add(new Email(message.getSubject(), messageContent, "",
                message.getReceivedDate().toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime(),
                attachments));
    }

    folder.close(false);

    return filteredEmails;
}