Example usage for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4

List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4

Introduction

In this page you can find the example usage for org.apache.commons.lang3 StringEscapeUtils unescapeHtml4.

Prototype

public static final String unescapeHtml4(final String input) 

Source Link

Document

Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes.

Usage

From source file:cx.fbn.nevernote.sql.REnSearch.java

private boolean matchContentAll(Note n) {
    if (todo.size() == 0 && resource.size() == 0 && searchPhrases.size() == 0)
        return true;

    n = conn.getNoteTable().getNote(n.getGuid(), true, true, false, false, false);

    // Check for search phrases
    String text = StringEscapeUtils.unescapeHtml4(n.getContent().replaceAll("\\<.*?\\>", "")).toLowerCase();
    boolean negative = false;
    for (int i = 0; i < searchPhrases.size(); i++) {
        String phrase = searchPhrases.get(i);
        if (phrase.startsWith("-")) {
            negative = true;/*ww w  .  j a v a  2  s  .co  m*/
            phrase = phrase.substring(1);
        } else
            negative = false;
        phrase = phrase.substring(1);
        phrase = phrase.substring(0, phrase.length() - 1);
        if (text.indexOf(phrase) >= 0 && negative) {
            return false;
        }
        if (text.indexOf(phrase) < 0 && !negative)
            return false;
    }

    for (int i = 0; i < todo.size(); i++) {
        String value = todo.get(i);
        value = value.replace("\"", "");
        boolean desiredState;
        if (!value.endsWith(":false") && !value.endsWith(":true") && !value.endsWith(":*")
                && !value.endsWith("*"))
            return false;
        if (value.endsWith(":false"))
            desiredState = false;
        else
            desiredState = true;
        if (value.startsWith("-"))
            desiredState = !desiredState;
        int pos = n.getContent().indexOf("<en-todo");
        if (pos == -1 && !value.startsWith("-"))
            return false;
        if (pos > -1 && value.startsWith("-") && (value.endsWith("*") || value.endsWith(":")))
            return false;
        if (pos == -1 && !value.startsWith("-"))
            return false;
        boolean returnTodo = false;
        while (pos > -1) {
            int endPos = n.getContent().indexOf(">", pos);
            String segment = n.getContent().substring(pos, endPos);
            boolean currentState;
            if (segment.toLowerCase().indexOf("checked=\"true\"") == -1)
                currentState = false;
            else
                currentState = true;
            if (desiredState == currentState)
                returnTodo = true;
            if (value.endsWith("*") || value.endsWith(":"))
                returnTodo = true;

            pos = n.getContent().indexOf("<en-todo", pos + 1);
        }
        if (!returnTodo)
            return false;
    }

    // Check resources
    for (int i = 0; i < resource.size(); i++) {
        String resourceString = resource.get(i);
        resourceString = resourceString.replace("\"", "");
        negative = false;
        if (resourceString.startsWith("-"))
            negative = true;
        resourceString = resourceString.substring(resourceString.indexOf(":") + 1);
        if (resourceString.equals(""))
            return false;
        for (int j = 0; j < n.getResourcesSize(); j++) {
            boolean match = stringMatch(n.getResources().get(j).getMime(), resourceString, negative);
            if (!match && !negative)
                return false;
            if (match && negative)
                return false;
        }
    }

    return true;
}

From source file:com.nttec.everychan.chans.krautchan.KrautModule.java

@Override
public String deletePost(DeletePostModel model, ProgressListener listener, CancellableTask task)
        throws Exception {
    String url = (useHttps() ? "https://" : "http://") + CHAN_DOMAIN + "/delete";

    List<NameValuePair> pairs = new ArrayList<NameValuePair>();
    pairs.add(new BasicNameValuePair("post_" + model.postNumber, "delete"));
    pairs.add(new BasicNameValuePair("password", model.password));
    pairs.add(new BasicNameValuePair("board", model.boardName));

    HttpRequestModel request = HttpRequestModel.builder().setPOST(new UrlEncodedFormEntity(pairs, "UTF-8"))
            .setNoRedirect(true).build();
    HttpResponseModel response = null;// ww w .java  2 s.com
    try {
        response = HttpStreamer.getInstance().getFromUrl(url, request, httpClient, null, task);
        if (response.statusCode == 302) {
            for (Header header : response.headers) {
                if (header != null && HttpHeaders.LOCATION.equalsIgnoreCase(header.getName())) {
                    String location = header.getValue();
                    if (location.contains("banned"))
                        throw new Exception("You are banned");
                    break;
                }
            }
            return null;
        } else if (response.statusCode == 200) {
            ByteArrayOutputStream output = new ByteArrayOutputStream(1024);
            IOUtils.copyStream(response.stream, output);
            String htmlResponse = output.toString("UTF-8");
            int messageNoticePos = htmlResponse.indexOf("class=\"message_notice");
            if (messageNoticePos == -1)
                return null;
            int p2 = htmlResponse.indexOf('>', messageNoticePos);
            if (p2 != -1) {
                String errorMessage = htmlResponse.substring(p2 + 1);
                int p3 = errorMessage.indexOf("</tr>");
                if (p3 != -1)
                    errorMessage = errorMessage.substring(0, p3);
                errorMessage = RegexUtils.trimToSpace(
                        StringEscapeUtils.unescapeHtml4(RegexUtils.removeHtmlTags(errorMessage)).trim());
                throw new Exception(errorMessage);
            }
        }

        throw new HttpWrongStatusCodeException(response.statusCode,
                response.statusCode + " - " + response.statusReason);
    } finally {
        if (response != null)
            response.release();
    }
}

From source file:edu.illinois.cs.cogcomp.ner.Main.java

/**
 * process the single input file, produce output on standard out if no output directory is
 * defined, or produce the output in the output directory by the same file name as the input
 * file, or if a specific output filename is specified, use that name.
 * //from w  ww.  j  ava  2  s  .co  m
 * @param infile
 * @throws Exception
 */
private void processInputFile(File infile) throws Exception {
    String s = InFile.readFileText(infile.toString());
    s = StringEscapeUtils.unescapeHtml4(s);
    TextAnnotation ta = tab.createTextAnnotation(s);
    s = this.produceOutput(this.nerAnnotator.getView(ta), ta);
    this.getResultProcessor().publish(s, infile.getName());
}

From source file:de.dfki.km.leech.parser.wikipedia.WikipediaDumpParser.java

@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {

    try {//from  w  ww. java 2s .  c o  m

        // wir iterieren schn ber die page-Eintrge. Darin gibt es dann title, timestamp, <contributor> => <username> und text. den text mssen
        // wir noch bereinigen. dazu nehmen wir eine Vorverarbeitung mit bliki - dazu mssen wir aber selbst nochmal den String vorbereiten und
        // nachbereinigen. Leider.

        WikipediaDumpParserConfig wikipediaDumpParserConfig = context.get(WikipediaDumpParserConfig.class);

        if (wikipediaDumpParserConfig == null) {
            Logger.getLogger(WikipediaDumpParser.class.getName())
                    .info("No wikipedia parser config found. Will take the default one.");
            wikipediaDumpParserConfig = new WikipediaDumpParserConfig();
        }

        TikaInputStream tikaStream = TikaInputStream.get(stream);

        File fWikipediaDumpFile4Stream = tikaStream.getFile();

        MultiValueHashMap<String, String> hsPageTitle2Redirects = new MultiValueHashMap<String, String>();
        if (wikipediaDumpParserConfig.determinePageRedirects)
            hsPageTitle2Redirects = getPageTitle2Redirects(new FileInputStream(fWikipediaDumpFile4Stream));

        HashSet<String> hsRedirectPageTitles = new HashSet<String>(hsPageTitle2Redirects.values());

        String strCleanedText = "";
        String strBaseURL = null;

        XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();
        XMLEventReader xmlEventReader = xmlInputFactory
                .createXMLEventReader(new FileInputStream(fWikipediaDumpFile4Stream), "Utf-8");
        while (xmlEventReader.hasNext()) {

            XMLEvent xmlEvent = xmlEventReader.nextEvent();

            if (xmlEvent.isEndElement() && xmlEvent.asEndElement().getName().getLocalPart().equals("page")) {
                if (metadata.size() == 0)
                    continue;

                // den mimetype wollen wir auch noch in den Metadaten haben
                metadata.add(Metadata.CONTENT_TYPE, "application/wikipedia+xml");

                XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
                xhtml.startDocument();

                xhtml.startElement("p");
                xhtml.characters(strCleanedText.toCharArray(), 0, strCleanedText.length());
                xhtml.endElement("p");

                xhtml.endDocument();

            }

            if (!xmlEvent.isStartElement())
                continue;

            // ##### die siteinfo

            if (strBaseURL == null && xmlEvent.asStartElement().getName().getLocalPart().equals("base")) {
                // http://de.wikipedia.org/wiki/Wikipedia:Hauptseite =>http://de.wikipedia.org/wiki/
                strBaseURL = readNextCharEventsText(xmlEventReader);
                strBaseURL = strBaseURL.substring(0, strBaseURL.lastIndexOf("/") + 1);
            }

            // ##### die page

            if (xmlEvent.asStartElement().getName().getLocalPart().equals("page")) {
                for (String strKey : metadata.names())
                    metadata.remove(strKey);
            }

            // ##### der Title

            if (xmlEvent.asStartElement().getName().getLocalPart().equals("title")) {
                // wir merken uns immer den aktuellen Titel
                String strCurrentTitle = readNextCharEventsText(xmlEventReader);

                if (strCurrentTitle.equalsIgnoreCase("DuckDuckGo")) {
                    int fasd = 8;
                }

                if (strCurrentTitle.toLowerCase().contains("duck")
                        && strCurrentTitle.toLowerCase().contains("go")) {
                    int is = 666;
                }

                // wenn der Titel eine redirect-Page ist, dann tragen wir die ganze Page aus der EventQueue aus, springen an das endPage, und
                // haben somit diese Seite ignoriert. Ferner ignorieren wir auch spezielle wikipedia-Seiten
                String strSmallTitle = strCurrentTitle.trim().toLowerCase();
                if (hsRedirectPageTitles.contains(strCurrentTitle)
                        || hsRedirectPageTitles.contains(strSmallTitle)
                        || hsRedirectPageTitles.contains(strCurrentTitle.trim())
                        || strSmallTitle.startsWith("category:") || strSmallTitle.startsWith("kategorie:")
                        || strSmallTitle.startsWith("vorlage:") || strSmallTitle.startsWith("template:")
                        || strSmallTitle.startsWith("hilfe:") || strSmallTitle.startsWith("help:")
                        || strSmallTitle.startsWith("wikipedia:") || strSmallTitle.startsWith("portal:")
                        || strSmallTitle.startsWith("mediawiki:")) {

                    while (true) {
                        XMLEvent nextXmlEvent = xmlEventReader.nextEvent();
                        if (nextXmlEvent.isEndElement()
                                && nextXmlEvent.asEndElement().getName().getLocalPart().equals("page"))
                            break;
                    }
                } else {
                    metadata.add(Metadata.TITLE, strCurrentTitle);
                    metadata.add(Metadata.SOURCE, strBaseURL + strCurrentTitle);

                    for (String strRedirect : hsPageTitle2Redirects.get(strCurrentTitle)) {
                        // wir ignorieren Titel, die sich lediglich durch gro/kleinschreibung unterscheiden
                        if (!StringUtils.containsIgnoreCase(strRedirect, metadata.getValues(Metadata.TITLE)))
                            metadata.add(Metadata.TITLE, strRedirect);
                    }
                }

                continue;
            }

            // ##### der text
            if (xmlEvent.asStartElement().getName().getLocalPart().equals("text")) {
                String strText = readNextCharEventsText(xmlEventReader);

                if (wikipediaDumpParserConfig.parseLinksAndCategories)
                    parseLinksAndCategories(strText, strBaseURL, metadata, handler);
                if (wikipediaDumpParserConfig.parseInfoBoxes)
                    parseInfoBox(strText, metadata, handler);
                if (wikipediaDumpParserConfig.parseGeoCoordinates)
                    parseGeoCoordinates(strText, metadata);

                // aufgrund einiger Defizite in dem verwendeten cleaner mssen wir hier leider noch zu-und nacharbeiten
                strText = strText.replaceAll("==\n", "==\n\n");
                strText = strText.replaceAll("\n==", "\n\n==");

                strCleanedText = m_wikiModel.render(new PlainTextConverter(), strText);

                strCleanedText = strCleanedText.replaceAll("\\{\\{", " ");
                strCleanedText = strCleanedText.replaceAll("\\}\\}", " ");

                strCleanedText = StringEscapeUtils.unescapeHtml4(strCleanedText);

                continue;
            }

            // ##### der timestamp
            if (xmlEvent.asStartElement().getName().getLocalPart().equals("timestamp")) {
                String strTimestamp = readNextCharEventsText(xmlEventReader);

                metadata.add(Metadata.MODIFIED, strTimestamp);

                continue;
            }

            // ##### der username
            if (xmlEvent.asStartElement().getName().getLocalPart().equals("username")) {
                String strUsername = readNextCharEventsText(xmlEventReader);

                metadata.add(Metadata.CREATOR, strUsername);

                continue;
            }

        }

    } catch (Exception e) {
        Logger.getLogger(WikipediaDumpParser.class.getName()).log(Level.SEVERE, "Error", e);
    }

}

From source file:com.wegas.core.Helper.java

/**
 * Insensitive contains//from w  w  w .  ja  va  2 s . co  m
 *
 * @param text     text to search in
 * @param criteria criteria to search for
 * @return match
 */
public static Boolean insensitiveContains(String text, String criteria) {
    if (text == null) {
        return false;
    }
    return Pattern.compile(Pattern.quote(criteria), Pattern.CASE_INSENSITIVE)
            .matcher(StringEscapeUtils.unescapeHtml4(text)).find();
}

From source file:cgeo.geocaching.connector.gc.GCParser.java

static SearchResult parseCacheFromText(final String pageIn, final CancellableHandler handler) {
    CancellableHandler.sendLoadProgressDetail(handler, R.string.cache_dialog_loading_details_status_details);

    if (StringUtils.isBlank(pageIn)) {
        Log.e("GCParser.parseCache: No page given");
        return null;
    }//ww w . j  ava 2 s.  c o m

    final SearchResult searchResult = new SearchResult();

    if (pageIn.contains(GCConstants.STRING_UNPUBLISHED_OTHER)
            || pageIn.contains(GCConstants.STRING_UNPUBLISHED_FROM_SEARCH)) {
        searchResult.setError(StatusCode.UNPUBLISHED_CACHE);
        return searchResult;
    }

    if (pageIn.contains(GCConstants.STRING_PREMIUMONLY_1)
            || pageIn.contains(GCConstants.STRING_PREMIUMONLY_2)) {
        searchResult.setError(StatusCode.PREMIUM_ONLY);
        return searchResult;
    }

    final String cacheName = Html.fromHtml(TextUtils.getMatch(pageIn, GCConstants.PATTERN_NAME, true, ""))
            .toString();
    if (GCConstants.STRING_UNKNOWN_ERROR.equalsIgnoreCase(cacheName)) {
        searchResult.setError(StatusCode.UNKNOWN_ERROR);
        return searchResult;
    }

    // first handle the content with line breaks, then trim everything for easier matching and reduced memory consumption in parsed fields
    String personalNoteWithLineBreaks = "";
    MatcherWrapper matcher = new MatcherWrapper(GCConstants.PATTERN_PERSONALNOTE, pageIn);
    if (matcher.find()) {
        personalNoteWithLineBreaks = matcher.group(1).trim();
    }

    final String page = TextUtils.replaceWhitespace(pageIn);

    final Geocache cache = new Geocache();
    cache.setDisabled(page.contains(GCConstants.STRING_DISABLED));

    cache.setArchived(page.contains(GCConstants.STRING_ARCHIVED));

    cache.setPremiumMembersOnly(TextUtils.matches(page, GCConstants.PATTERN_PREMIUMMEMBERS));

    cache.setFavorite(TextUtils.matches(page, GCConstants.PATTERN_FAVORITE));

    // cache geocode
    cache.setGeocode(TextUtils.getMatch(page, GCConstants.PATTERN_GEOCODE, true, cache.getGeocode()));

    // cache id
    cache.setCacheId(TextUtils.getMatch(page, GCConstants.PATTERN_CACHEID, true, cache.getCacheId()));

    // cache guid
    cache.setGuid(TextUtils.getMatch(page, GCConstants.PATTERN_GUID, true, cache.getGuid()));

    // name
    cache.setName(cacheName);

    // owner real name
    cache.setOwnerUserId(Network
            .decode(TextUtils.getMatch(page, GCConstants.PATTERN_OWNER_USERID, true, cache.getOwnerUserId())));

    cache.setUserModifiedCoords(false);

    String tableInside = page;

    final int pos = tableInside.indexOf(GCConstants.STRING_CACHEDETAILS);
    if (pos == -1) {
        Log.e("GCParser.parseCache: ID \"cacheDetails\" not found on page");
        return null;
    }

    tableInside = tableInside.substring(pos);

    if (StringUtils.isNotBlank(tableInside)) {
        // cache terrain
        String result = TextUtils.getMatch(tableInside, GCConstants.PATTERN_TERRAIN, true, null);
        if (result != null) {
            try {
                cache.setTerrain(Float.parseFloat(StringUtils.replaceChars(result, '_', '.')));
            } catch (final NumberFormatException e) {
                Log.e("Error parsing terrain value", e);
            }
        }

        // cache difficulty
        result = TextUtils.getMatch(tableInside, GCConstants.PATTERN_DIFFICULTY, true, null);
        if (result != null) {
            try {
                cache.setDifficulty(Float.parseFloat(StringUtils.replaceChars(result, '_', '.')));
            } catch (final NumberFormatException e) {
                Log.e("Error parsing difficulty value", e);
            }
        }

        // owner
        cache.setOwnerDisplayName(StringEscapeUtils.unescapeHtml4(TextUtils.getMatch(tableInside,
                GCConstants.PATTERN_OWNER_DISPLAYNAME, true, cache.getOwnerDisplayName())));

        // hidden
        try {
            String hiddenString = TextUtils.getMatch(tableInside, GCConstants.PATTERN_HIDDEN, true, null);
            if (StringUtils.isNotBlank(hiddenString)) {
                cache.setHidden(GCLogin.parseGcCustomDate(hiddenString));
            }
            if (cache.getHiddenDate() == null) {
                // event date
                hiddenString = TextUtils.getMatch(tableInside, GCConstants.PATTERN_HIDDENEVENT, true, null);
                if (StringUtils.isNotBlank(hiddenString)) {
                    cache.setHidden(GCLogin.parseGcCustomDate(hiddenString));
                }
            }
        } catch (final ParseException e) {
            // failed to parse cache hidden date
            Log.w("GCParser.parseCache: Failed to parse cache hidden (event) date");
        }

        // favorite
        try {
            cache.setFavoritePoints(Integer
                    .parseInt(TextUtils.getMatch(tableInside, GCConstants.PATTERN_FAVORITECOUNT, true, "0")));
        } catch (final NumberFormatException e) {
            Log.e("Error parsing favorite count", e);
        }

        // cache size
        cache.setSize(CacheSize.getById(
                TextUtils.getMatch(tableInside, GCConstants.PATTERN_SIZE, true, CacheSize.NOT_CHOSEN.id)));
    }

    // cache found
    cache.setFound(TextUtils.matches(page, GCConstants.PATTERN_FOUND)
            || TextUtils.matches(page, GCConstants.PATTERN_FOUND_ALTERNATIVE));

    // cache found date
    try {
        final String foundDateString = TextUtils.getMatch(page, GCConstants.PATTERN_FOUND_DATE, true, null);
        if (StringUtils.isNotBlank(foundDateString)) {
            cache.setVisitedDate(GCLogin.parseGcCustomDate(foundDateString).getTime());
        }
    } catch (final ParseException e) {
        // failed to parse cache found date
        Log.w("GCParser.parseCache: Failed to parse cache found date");
    }

    // cache type
    cache.setType(CacheType
            .getByPattern(TextUtils.getMatch(page, GCConstants.PATTERN_TYPE, true, cache.getType().id)));

    // on watchlist
    cache.setOnWatchlist(TextUtils.matches(page, GCConstants.PATTERN_WATCHLIST));

    // latitude and longitude. Can only be retrieved if user is logged in
    String latlon = TextUtils.getMatch(page, GCConstants.PATTERN_LATLON, true, "");
    if (StringUtils.isNotEmpty(latlon)) {
        try {
            cache.setCoords(new Geopoint(latlon));
            cache.setReliableLatLon(true);
        } catch (final Geopoint.GeopointException e) {
            Log.w("GCParser.parseCache: Failed to parse cache coordinates", e);
        }
    }

    // cache location
    cache.setLocation(TextUtils.getMatch(page, GCConstants.PATTERN_LOCATION, true, ""));

    // cache hint
    final String result = TextUtils.getMatch(page, GCConstants.PATTERN_HINT, false, null);
    if (result != null) {
        // replace linebreak and paragraph tags
        final String hint = GCConstants.PATTERN_LINEBREAK.matcher(result).replaceAll("\n");
        cache.setHint(StringUtils.replace(hint, "</p>", "").trim());
    }

    cache.checkFields();

    // cache personal note
    cache.setPersonalNote(personalNoteWithLineBreaks);

    // cache short description
    cache.setShortDescription(TextUtils.getMatch(page, GCConstants.PATTERN_SHORTDESC, true, ""));

    // cache description
    cache.setDescription(TextUtils.getMatch(page, GCConstants.PATTERN_DESC, true, ""));

    // cache attributes
    try {
        final String attributesPre = TextUtils.getMatch(page, GCConstants.PATTERN_ATTRIBUTES, true, null);
        if (null != attributesPre) {
            final MatcherWrapper matcherAttributesInside = new MatcherWrapper(
                    GCConstants.PATTERN_ATTRIBUTESINSIDE, attributesPre);

            final ArrayList<String> attributes = new ArrayList<String>();
            while (matcherAttributesInside.find()) {
                if (matcherAttributesInside.groupCount() > 1
                        && !matcherAttributesInside.group(2).equalsIgnoreCase("blank")) {
                    // by default, use the tooltip of the attribute
                    String attribute = matcherAttributesInside.group(2).toLowerCase(Locale.US);

                    // if the image name can be recognized, use the image name as attribute
                    final String imageName = matcherAttributesInside.group(1).trim();
                    if (StringUtils.isNotEmpty(imageName)) {
                        final int start = imageName.lastIndexOf('/');
                        final int end = imageName.lastIndexOf('.');
                        if (start >= 0 && end >= 0) {
                            attribute = imageName.substring(start + 1, end).replace('-', '_')
                                    .toLowerCase(Locale.US);
                        }
                    }
                    attributes.add(attribute);
                }
            }
            cache.setAttributes(attributes);
        }
    } catch (final RuntimeException e) {
        // failed to parse cache attributes
        Log.w("GCParser.parseCache: Failed to parse cache attributes");
    }

    // cache spoilers
    try {
        if (CancellableHandler.isCancelled(handler)) {
            return null;
        }
        CancellableHandler.sendLoadProgressDetail(handler,
                R.string.cache_dialog_loading_details_status_spoilers);

        final MatcherWrapper matcherSpoilersInside = new MatcherWrapper(GCConstants.PATTERN_SPOILER_IMAGE,
                page);

        while (matcherSpoilersInside.find()) {
            // the original spoiler URL (include .../display/... contains a low-resolution image
            // if we shorten the URL we get the original-resolution image
            final String url = matcherSpoilersInside.group(1).replace("/display", "");

            String title = null;
            if (matcherSpoilersInside.group(3) != null) {
                title = matcherSpoilersInside.group(3);
            }
            String description = null;
            if (matcherSpoilersInside.group(4) != null) {
                description = matcherSpoilersInside.group(4);
            }
            cache.addSpoiler(new Image(url, title, description));
        }
    } catch (final RuntimeException e) {
        // failed to parse cache spoilers
        Log.w("GCParser.parseCache: Failed to parse cache spoilers");
    }

    // cache inventory
    try {
        cache.setInventoryItems(0);

        final MatcherWrapper matcherInventory = new MatcherWrapper(GCConstants.PATTERN_INVENTORY, page);
        if (matcherInventory.find()) {
            if (cache.getInventory() == null) {
                cache.setInventory(new ArrayList<Trackable>());
            }

            if (matcherInventory.groupCount() > 1) {
                final String inventoryPre = matcherInventory.group(2);

                if (StringUtils.isNotBlank(inventoryPre)) {
                    final MatcherWrapper matcherInventoryInside = new MatcherWrapper(
                            GCConstants.PATTERN_INVENTORYINSIDE, inventoryPre);

                    while (matcherInventoryInside.find()) {
                        if (matcherInventoryInside.groupCount() > 0) {
                            final Trackable inventoryItem = new Trackable();
                            inventoryItem.setGuid(matcherInventoryInside.group(1));
                            inventoryItem.setName(matcherInventoryInside.group(2));

                            cache.getInventory().add(inventoryItem);
                            cache.setInventoryItems(cache.getInventoryItems() + 1);
                        }
                    }
                }
            }
        }
    } catch (final RuntimeException e) {
        // failed to parse cache inventory
        Log.w("GCParser.parseCache: Failed to parse cache inventory (2)");
    }

    // cache logs counts
    try {
        final String countlogs = TextUtils.getMatch(page, GCConstants.PATTERN_COUNTLOGS, true, null);
        if (null != countlogs) {
            final MatcherWrapper matcherLog = new MatcherWrapper(GCConstants.PATTERN_COUNTLOG, countlogs);

            while (matcherLog.find()) {
                final String typeStr = matcherLog.group(1);
                final String countStr = getNumberString(matcherLog.group(2));

                if (StringUtils.isNotBlank(typeStr) && LogType.UNKNOWN != LogType.getByIconName(typeStr)
                        && StringUtils.isNotBlank(countStr)) {
                    cache.getLogCounts().put(LogType.getByIconName(typeStr), Integer.parseInt(countStr));
                }
            }
        }
    } catch (final NumberFormatException e) {
        // failed to parse logs
        Log.w("GCParser.parseCache: Failed to parse cache log count");
    }

    // waypoints - reset collection
    cache.setWaypoints(Collections.<Waypoint>emptyList(), false);

    // add waypoint for original coordinates in case of user-modified listing-coordinates
    try {
        final String originalCoords = TextUtils.getMatch(page, GCConstants.PATTERN_LATLON_ORIG, false, null);

        if (null != originalCoords) {
            final Waypoint waypoint = new Waypoint(
                    CgeoApplication.getInstance().getString(R.string.cache_coordinates_original),
                    WaypointType.ORIGINAL, false);
            waypoint.setCoords(new Geopoint(originalCoords));
            cache.addOrChangeWaypoint(waypoint, false);
            cache.setUserModifiedCoords(true);
        }
    } catch (final Geopoint.GeopointException e) {
    }

    int wpBegin = page.indexOf("<table class=\"Table\" id=\"ctl00_ContentBody_Waypoints\">");
    if (wpBegin != -1) { // parse waypoints
        if (CancellableHandler.isCancelled(handler)) {
            return null;
        }
        CancellableHandler.sendLoadProgressDetail(handler,
                R.string.cache_dialog_loading_details_status_waypoints);

        String wpList = page.substring(wpBegin);

        int wpEnd = wpList.indexOf("</p>");
        if (wpEnd > -1 && wpEnd <= wpList.length()) {
            wpList = wpList.substring(0, wpEnd);
        }

        if (!wpList.contains("No additional waypoints to display.")) {
            wpEnd = wpList.indexOf("</table>");
            wpList = wpList.substring(0, wpEnd);

            wpBegin = wpList.indexOf("<tbody>");
            wpEnd = wpList.indexOf("</tbody>");
            if (wpBegin >= 0 && wpEnd >= 0 && wpEnd <= wpList.length()) {
                wpList = wpList.substring(wpBegin + 7, wpEnd);
            }

            final String[] wpItems = wpList.split("<tr");

            for (int j = 1; j < wpItems.length; j++) {
                String[] wp = wpItems[j].split("<td");

                // waypoint name
                // res is null during the unit tests
                final String name = TextUtils.getMatch(wp[6], GCConstants.PATTERN_WPNAME, true, 1,
                        CgeoApplication.getInstance().getString(R.string.waypoint), true);

                // waypoint type
                final String resulttype = TextUtils.getMatch(wp[3], GCConstants.PATTERN_WPTYPE, null);

                final Waypoint waypoint = new Waypoint(name, WaypointType.findById(resulttype), false);

                // waypoint prefix
                waypoint.setPrefix(TextUtils.getMatch(wp[4], GCConstants.PATTERN_WPPREFIXORLOOKUPORLATLON, true,
                        2, waypoint.getPrefix(), false));

                // waypoint lookup
                waypoint.setLookup(TextUtils.getMatch(wp[5], GCConstants.PATTERN_WPPREFIXORLOOKUPORLATLON, true,
                        2, waypoint.getLookup(), false));

                // waypoint latitude and longitude
                latlon = Html.fromHtml(TextUtils.getMatch(wp[7], GCConstants.PATTERN_WPPREFIXORLOOKUPORLATLON,
                        false, 2, "", false)).toString().trim();
                if (!StringUtils.startsWith(latlon, "???")) {
                    waypoint.setLatlon(latlon);
                    waypoint.setCoords(new Geopoint(latlon));
                }

                j++;
                if (wpItems.length > j) {
                    wp = wpItems[j].split("<td");
                }

                // waypoint note
                waypoint.setNote(TextUtils.getMatch(wp[3], GCConstants.PATTERN_WPNOTE, waypoint.getNote()));

                cache.addOrChangeWaypoint(waypoint, false);
            }
        }
    }

    cache.parseWaypointsFromNote();

    // logs
    cache.setLogs(getLogsFromDetails(page, false));

    // last check for necessary cache conditions
    if (StringUtils.isBlank(cache.getGeocode())) {
        searchResult.setError(StatusCode.UNKNOWN_ERROR);
        return searchResult;
    }

    cache.setDetailedUpdatedNow();
    searchResult.addAndPutInCache(Collections.singletonList(cache));
    return searchResult;
}

From source file:com.rockagen.commons.util.CommUtil.java

/**
 * <p>//w  w  w  . ja v  a 2  s.  c  om
 * unescape HTML see StringEscapeUtils.unescapeHtml4(str)
 * </p>
 * <p>
 * Supports all known HTML 4.0 entities, including funky accents. Note that
 * the commonly used apostrophe escape character (&amp;apos;) is not a legal
 * entity and so is not supported).
 * </p>
 * <p>
 * For example:
 * </p>
 * <p>
 * <code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code>
 * </p>
 * becomes:
 * <p>
 * <code>"bread" &amp; "butter"</code>.
 * </p>
 *
 * @param str value
 * @return string
 */
public static String unescapeHtml4(String str) {
    if (isBlank(str)) {
        return str;
    }
    return StringEscapeUtils.unescapeHtml4(str);
}

From source file:info.icefilms.icestream.browse.Location.java

protected static String CleanString(String string) {
    // Remove any html tags
    string = Pattern.compile("<.*?>", Pattern.DOTALL).matcher(string).replaceAll("");

    // Replace any special HTML characters
    string = StringEscapeUtils.unescapeHtml4(string);

    return string;
}

From source file:de.akra.idocit.java.services.SimpleJavadocParser.java

private String formatText(final String text) {
    return StringEscapeUtils
            .unescapeHtml4(text.replaceAll("<br/>", StringUtils.NEW_LINE).replaceAll("<tab/>", "\t"));
}

From source file:com.nttec.everychan.api.util.WakabaReader.java

/**
 *  ? ?   ,  HTML-?, ? ? &lt;span class="filesize"&gt;.
 * ?   ,     ?? {@link #currentAttachments}
 *  ??  {link {@link ThreadModel#attachmentsCount})  {@link #currentThread}.<br>
 *  ?    ??  /*from  w  w  w  .j  a v a2s  .c om*/
 * (? , ??  ,   ,   ??,  ?, ? ?).
 */
protected void parseAttachment(String html) {
    AttachmentModel attachment = new AttachmentModel();
    attachment.size = -1;

    int startHref, endHref;
    if ((startHref = html.indexOf("href=\"")) != -1 && (endHref = html.indexOf('\"', startHref + 6)) != -1) {
        attachment.path = html.substring(startHref + 6, endHref);
        String pathLower = attachment.path.toLowerCase(Locale.US);
        if (pathLower.endsWith(".jpg") || pathLower.endsWith(".jpeg") || pathLower.endsWith(".png"))
            attachment.type = AttachmentModel.TYPE_IMAGE_STATIC;
        else if (pathLower.endsWith(".gif"))
            attachment.type = AttachmentModel.TYPE_IMAGE_GIF;
        else if (pathLower.endsWith(".svg") || pathLower.endsWith(".svgz"))
            attachment.type = AttachmentModel.TYPE_IMAGE_SVG;
        else if (pathLower.endsWith(".webm") || pathLower.endsWith(".mp4") || pathLower.endsWith(".ogv"))
            attachment.type = AttachmentModel.TYPE_VIDEO;
        else if (pathLower.endsWith(".mp3") || pathLower.endsWith(".ogg"))
            attachment.type = AttachmentModel.TYPE_AUDIO;
        else if (pathLower.startsWith("http") && (pathLower.contains("youtube.")))
            attachment.type = AttachmentModel.TYPE_OTHER_NOTFILE;
        else
            attachment.type = AttachmentModel.TYPE_OTHER_FILE;
    } else {
        return;
    }

    Matcher byteSizeMatcher = ATTACHMENT_SIZE_PATTERN.matcher(html);
    while (byteSizeMatcher.find()) {
        try {
            String digits = byteSizeMatcher.group(1).replace(',', '.');
            int multiplier = 1;
            String prefix = byteSizeMatcher.group(2);
            if (prefix != null) {
                if (prefix.equalsIgnoreCase("") || prefix.equalsIgnoreCase("k"))
                    multiplier = 1024;
                else if (prefix.equalsIgnoreCase("") || prefix.equalsIgnoreCase("m"))
                    multiplier = 1024 * 1024;
            }
            int value = Math.round(Float.parseFloat(digits) / 1024 * multiplier);
            attachment.size = value;

            char nextChar = ' ';
            int index = byteSizeMatcher.end();
            while (index < html.length() && nextChar <= ' ')
                nextChar = html.charAt(index++);
            if (nextChar == ',')
                break;
        } catch (NumberFormatException e) {
        }
    }

    Matcher pxSizeMatcher = ATTACHMENT_PX_SIZE_PATTERN.matcher(html);
    int indexEndPxSize = -1;
    while (pxSizeMatcher.find()) {
        try {
            int width = Integer.parseInt(pxSizeMatcher.group(1));
            int height = Integer.parseInt(pxSizeMatcher.group(2));
            attachment.width = width;
            attachment.height = height;
            indexEndPxSize = pxSizeMatcher.end();

            char nextChar = ' ';
            int index = pxSizeMatcher.end();
            while (index < html.length() && nextChar <= ' ')
                nextChar = html.charAt(index++);
            if (nextChar == ',')
                break;
        } catch (NumberFormatException e) {
        }
    }

    if (indexEndPxSize != -1) {
        Matcher originalNameMatcher = ATTACHMENT_ORIGINAL_NAME_PATTERN.matcher(html);
        if (originalNameMatcher.find(indexEndPxSize)) {
            String originalName = originalNameMatcher.group(1).trim();
            if (originalName != null && originalName.length() > 0) {
                attachment.originalName = StringEscapeUtils.unescapeHtml4(originalName);
            }
        }
    }

    ++currentThread.attachmentsCount;
    currentAttachments.add(attachment);
}