Java examples for java.lang:String Strip
Attempts to remove all unnecessary P tags from html strings
/********************************************************************************** * $URL$/*ww w. j a v a 2 s. c o m*/ * $Id$ *********************************************************************************** * * Copyright (c) 2008 The Sakai Foundation * * Licensed under the Educational Community License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.opensource.org/licenses/ECL-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * **********************************************************************************/ //package com.java2s; public class Main { public static String ENDING_P_SPACE_TAGS = "<p> </p>"; public static String STARTING_P_TAG = "<p>"; public static String ENDING_P_TAG = "</p>"; /** * Attempts to remove all unnecessary P tags from html strings * * @param cleanup an html string to cleanup * @return the cleaned up string */ public static String cleanupHtmlPtags(String cleanup) { if (cleanup == null) { // nulls are ok return null; } else if (cleanup.trim().length() == 0) { // nothing to do return cleanup; } cleanup = cleanup.trim(); if (cleanup.length() > ENDING_P_SPACE_TAGS.length()) { // (remove trailing blank lines) // - While (cleanup ends with "<p> </p>") remove trailing "<p> </p>". while (cleanup.toLowerCase().endsWith(ENDING_P_SPACE_TAGS)) { // chop off the end cleanup = cleanup.substring(0, cleanup.length() - ENDING_P_SPACE_TAGS.length()) .trim(); } } if (cleanup.length() > (STARTING_P_TAG.length() + ENDING_P_TAG .length())) { // (remove a single set of <p> tags) // if cleanup starts with "<p>" and cleanup ends with "</p>" and, remove leading "<p>" and trailing "</p>" from cleanup String lcCheck = cleanup.toLowerCase(); if (lcCheck.startsWith(STARTING_P_TAG) && lcCheck.endsWith(ENDING_P_TAG)) { if (lcCheck .indexOf(STARTING_P_TAG, STARTING_P_TAG.length()) == -1 && lcCheck.lastIndexOf(ENDING_P_TAG, lcCheck.length() - ENDING_P_TAG.length() - 1) == -1) { // chop off the front and end P tags cleanup = cleanup.substring(STARTING_P_TAG.length(), cleanup.length() - ENDING_P_TAG.length()) .trim(); } } } //remove the bad <br type="_moz" /> cleanup = cleanup.replace("<br type=\"_moz\" />", ""); return cleanup; } }