Here you can find the source of normalize(String text)
public static String normalize(String text)
//package com.java2s; import java.util.ArrayList; import java.util.Iterator; import java.util.List; public class Main { private final static List<String> words = new ArrayList<String>(); public static String normalize(String text) { StringBuilder builder = new StringBuilder(); int index = 0; int quote = 0; text = text.trim();//w w w .j a v a 2s . c o m while (index < text.length()) { char c = text.charAt(index); if (Character.isWhitespace(c) || Character.isSpaceChar(c)) c = ' '; switch (c) { case '+': case '-': case '&': case '|': case '!': case '(': case ')': case '{': case '}': case '[': case ']': case '^': case '~': case '*': case '?': case ':': case '\\': case '=': case '\'': builder.append('\\').append(c); break; case '\"': builder.append(c); quote++; break; default: /*if(Character.isLetterOrDigit(c) || Character.isWhitespace(c) || Character.isSpaceChar(c) || c < 256)*/ builder.append(c); break; } index++; } if (quote % 2 == 1) builder.append('\"'); // System.out.println(" normalize text: "+ builder); if (builder.length() < 30 && words.size() > 0) { // System.out.println("compile text : "+ compile(builder.toString())); return compile(builder.toString()); } return builder.toString(); } static String compile(String text) { StringBuilder builder = new StringBuilder(text); StringBuilder lower = new StringBuilder(text.toLowerCase()); Iterator<String> iterator = words.iterator(); while (iterator.hasNext()) { String key = iterator.next(); replace(builder, lower, key); } return builder.toString(); } private static void replace(StringBuilder builder, StringBuilder lower, String key) { int index = lower.indexOf(key); // System.out.println(" kiem key "+ key + " : "+ index); String value = new StringBuilder("\"").append(key).append("\"").toString(); while (index > -1) { if (index > 0 && Character.isLetterOrDigit(lower.charAt(index - 1))) { index = lower.indexOf(key, index + value.length()); continue; } if ((index + key.length()) < lower.length() - 1 && Character.isLetterOrDigit(lower.charAt(index + key.length()))) { index = lower.indexOf(key, index + value.length()); continue; } builder.replace(index, index + key.length(), value); lower.replace(index, index + key.length(), value.toLowerCase()); index = lower.indexOf(key, index + value.length()); } } }