Title Name Parser : String « Date Type « Android

Title Name Parser

    
// Thanks to Robert Cooper for this!
//package com.totsp.bookworm.util;

import java.util.HashSet;
import java.util.Set;

/**
 *
 * @author kebernet
 */
public class NameParser {
   private static final Set<String> TITLES = new HashSet<String>();
   private static final Set<String> SUFFIXES = new HashSet<String>();
   private static final Set<String> COMPOUND_NAMES = new HashSet<String>();
   public static final int TITLE = 0;
   public static final int FIRST_NAME = 1;
   public static final int MIDDLE_NAME = 2;
   public static final int LAST_NAME = 3;
   public static final int SUFFIX = 4;

   static {
      for (String title : new String[] { "dr.", "dr", "doctor", "mr.", "mr", "mister", "ms.", "ms", "miss", "mrs.",
               "mrs", "mistress", "hn.", "hn", "honorable", "the", "honorable", "his", "her", "honor", "fr", "fr.",
               "frau", "hr", "herr", "rv.", "rv", "rev.", "rev", "reverend", "reverend", "madam", "lord", "lady",
               "sir", "senior", "bishop", "rabbi", "holiness", "rebbe", "deacon", "eminence", "majesty", "consul",
               "vice", "president", "ambassador", "secretary", "undersecretary", "deputy", "inspector", "ins.",
               "detective", "det", "det.", "constable", "private", "pvt.", "pvt", "petty", "p.o.", "po", "first",
               "class", "p.f.c.", "pfc", "lcp.", "lcp", "corporal", "cpl.", "cpl", "colonel", "col", "col.",
               "capitain", "cpt.", "cpt", "ensign", "ens.", "ens", "lieutenant", "lt.", "lt", "ltc.", "ltc",
               "commander", "cmd.", "cmd", "cmdr", "rear", "radm", "r.adm.", "admiral", "adm.", "adm", "commodore",
               "cmd.", "cmd", "general", "gen", "gen.", "ltgen", "lt.gen.", "maj.gen.", "majgen.", "major", "maj.",
               "mjr", "maj", "seargent", "sgt.", "sgt", "chief", "cf.", "cf", "petty", "officer", "c.p.o.", "cpo",
               "master", "cmcpo", "fltmc", "formc", "mcpo", "mcpocg", "command", "fleet", "force" }) {
         NameParser.TITLES.add(title);
      }

      for (String suffix : new String[] { "jr.", "jr", "junior", "ii", "iii", "iv", "senior", "sr.", "sr", //family
               "phd", "ph.d", "ph.d.", "m.d.", "md", "d.d.s.", "dds", // doctors
               "k.c.v.o", "kcvo", "o.o.c", "ooc", "o.o.a", "ooa", "g.b.e", "gbe", // knighthoods
               "k.b.e.", "kbe", "c.b.e.", "cbe", "o.b.e.", "obe", "m.b.e", "mbe", //   cont
               "esq.", "esq", "esquire", "j.d.", "jd", // lawyers
               "m.f.a.", "mfa", //misc
               "r.n.", "rn", "l.p.n.", "lpn", "l.n.p.", "lnp", //nurses
               "c.p.a.", "cpa", //money men
               "d.d.", "dd", "d.div.", "ddiv", //preachers
               "ret", "ret." }) {
         NameParser.SUFFIXES.add(suffix);
      }

      for (String comp : new String[] { "de", "la", "st", "st.", "ste", "ste.", "saint", "van", "der", "al", "bin",
               "le", "mac", "di", "del", "vel", "von", "e'", "san", "af", "el" }) {
         NameParser.COMPOUND_NAMES.add(comp);
      }
   }

   /**
    * This method will parse a name into first middle and last names.
    * <p>
    *  Notes: "Al" is treated as a name. "al" as a name fragment. That is the
    *  only exception for capitalization.
    * </p>
    * @param name name to parse
    * @return String[5] containing title, first, middle and last names, suffix
    */
   public String[] parseName(String name) {
      // NOTE Add lookahead for Suffixes to support 
      // "Winthrop Wolfcasts, the 31st Duke of Winchester"
      String[] result = new String[5];

      if (name == null) {
         return result;
      }

      StringBuffer title = new StringBuffer();
      StringBuffer first = new StringBuffer();
      StringBuffer middle = new StringBuffer();
      StringBuffer last = new StringBuffer();
      StringBuffer suffix = new StringBuffer();
      boolean isLastCommaFirst = false;

      if (name.indexOf(",") != -1) {
         String[] lastRest = name.split(",");

         if (lastRest.length > 2) {
            isLastCommaFirst = true;
         } else {
            String[] suffixes = lastRest[1].toLowerCase().trim().split(" ");

            for (String check : suffixes) {
               if (!NameParser.SUFFIXES.contains(check)) {
                  isLastCommaFirst = true;

                  break;
               }
            }
         }
      }

      if (isLastCommaFirst) // the user split the last name
      {
         String[] lastRest = name.split(",");

         if (lastRest.length > 2) {
            for (int i = 2; i < lastRest.length; i++) //append the remaining elements to the end of the second element
            {
               lastRest[1] += (" " + lastRest[i]);
            }
         }

         result[NameParser.LAST_NAME] = lastRest[0].trim();

         if ((lastRest.length > 1) && (lastRest[1].trim().indexOf(" ") == -1)) // easy case
         {
            result[NameParser.FIRST_NAME] = lastRest[1].trim();

            return result;
         } else {
            String[] rest = lastRest[1].trim().split(" ");

            int head = 0;
            int tail = rest.length - 1;

            //System.out.println("tail::" + rest[tail]);

            //parse titles
            for (int i = head; (i < rest.length) && NameParser.TITLES.contains(rest[i].toLowerCase().trim()); i++) {
               if (i != 0) {
                  title.append(' ');
               }

               title.append(rest[i]);
               head++;
            }

            if (title.length() > 0) {
               result[NameParser.TITLE] = title.toString();
            }

            //System.out.println(rest[tail].toLowerCase().trim() + ":: " +
            //    SUFFIXES.contains(rest[tail].toLowerCase().trim()));

            //parse suffixes
            for (int i = tail; (i >= head) && NameParser.SUFFIXES.contains(rest[i].toLowerCase().trim()); i--) {
               if (i != tail) {
                  suffix.insert(0, ' ');
               }

               suffix.insert(0, rest[i]);
               tail--;
            }

            if (suffix.length() > 0) {
               result[NameParser.SUFFIX] = suffix.toString();
            }

            int[] nextNameOrder = new int[] { NameParser.FIRST_NAME, NameParser.MIDDLE_NAME };
            int nextNameIndex = 0;

            //System.out.println("head:" + head + " tail:" + tail);
            //System.out.println("Suffix " + suffix);
            for (int i = head; i <= tail; i++) {
               StringBuffer nextName = new StringBuffer();

               while (!rest[i].trim().equals("Al") && NameParser.COMPOUND_NAMES.contains(rest[i].toLowerCase().trim())) {
                  nextName.append(rest[i].trim());

                  if (i != tail) {
                     nextName.append(' ');
                  }

                  i++;

                  if (i == tail) {
                     break;
                  }
               }

               nextName.append(rest[i]);
               result[nextNameOrder[nextNameIndex]] = nextName.toString();
               nextNameIndex++;

               if (nextNameIndex == nextNameOrder.length) {
                  for (int j = i + 1; j < tail; j++) {
                     if (j != (i + 1)) {
                        nextName.append(' ');
                     }

                     nextName.append(rest[j]);
                  }

                  result[nextNameOrder[nextNameIndex - 1]] = nextName.toString();

                  break;
               }
            }
         }
      } // end last, first case.
      else {
         String[] names = name.split(" ");
         int head = 0;
         int tail = names.length - 1;

         //parse titles
         for (int i = head; (i < tail) && NameParser.TITLES.contains(names[i].toLowerCase().trim()); i++) {
            if (i != 0) {
               title.append(' ');
            }

            title.append(names[i]);
            head++;
         }

         if (title.length() > 0) {
            result[NameParser.TITLE] = title.toString();
         }

         //parse suffixes
         for (int i = tail; (i >= head) && NameParser.SUFFIXES.contains(names[i].toLowerCase().trim()); i--) {
            if (i != tail) {
               suffix.insert(0, ' ');
            }

            suffix.insert(0, names[i]);
            tail--;
         }

         if (suffix.length() > 0) {
            result[NameParser.SUFFIX] = suffix.toString();
            names[tail] = names[tail].replaceAll(",", "");
         }

         if (head == tail) { //Only one name left

            if (names[head].trim().length() > 0) {
               result[NameParser.FIRST_NAME] = names[head];
            }
         } else {
            //parse last name
            last.append(names[tail]);
            tail--;

            for (int i = tail; (i >= head) && !names[i].trim().equals("Al")
                     && NameParser.COMPOUND_NAMES.contains(names[i].toLowerCase().trim()); i--) {
               last.insert(0, ' ');

               last.insert(0, names[i]);
               tail--;
            }

            boolean firstPass = true;

            //parse first name
            for (int i = head; i <= tail; i++) {
               if (!firstPass) {
                  first.append(' ');
               }

               first.append(names[i].trim());
               head++;
               firstPass = false;

               if (names[i].trim().equals("Al") || !NameParser.COMPOUND_NAMES.contains(names[i].trim().toLowerCase())) {
                  break;
               }
            }

            //build middle name
            for (int i = head; i <= tail; i++) {
               if (i != head) {
                  middle.append(' ');
               }

               middle.append(names[i].trim());
            }
         }

         if (first.length() > 0) {
            result[NameParser.FIRST_NAME] = first.toString().trim();
         }

         if (last.length() > 0) {
            result[NameParser.LAST_NAME] = last.toString().trim();
         }

         if (middle.length() > 0) {
            result[NameParser.MIDDLE_NAME] = middle.toString().trim();
         }
      }

      return result;
   }
}

Related examples in the same category

1.	Split with
2.	Split first with
3.	split By Space and save result to a List
4.	Space trim
5.	truncate by length
6.	Remove all blanks
7.	Is a string a Number
8.	Random string
9.	Tokenizer. Why? Because StringTokenizer is not available in J2ME.
10.	String resource
11.	Shows creating text with links from HTML in the Java code, rather than from a string resource. Note that for a
12.	Join a collection of strings by a seperator
13.	Tests if a string is blank: null, emtpy, or only whitespace (" ", \r\n, \t, etc)
14.	Tests if a string is numeric, i.e. contains only digit characters
15.	Writer implementation that outputs to a StringBuilder
16.	Gets the device's phone number as a String.
17.	Inspects a link Configuration through reflection API to generate a human readable String with values replaced with their constants names.
18.	Returns a String representation of the content of a android.view.Display object.
19.	Get String Element Value
20.	Join strings
21.	Find two consecutive newlines in a string.
22.	Retrieve a boolean primitive type from a String.
23.	Trim char from string
24.	Returns true if the string does not fit in standard ASCII
25.	Returns true if the given string is null or empty.
26.	4 octets in address string
27.	Add space to CSV string
28.	String fast Split
29.	Split a String by a Character, i.e. Split lines by using '\n'
30.	String Capitalizer
31.	Count char in a string
32.	Search char in a string from a starting position
33.	load String From Raw Resource
34.	Join Collection of String
35.	Padding a string, truncate a string
36.	Converts a string to title casing.
37.	reversing String
38.	load Resource To String
39.	convert Duration to String
40.	Convert string from one encoding to another
41.	Object to String and String to Object
42.	IP to String
43.	Convert string to bumber and convert number to string
44.	line string reader in J2ME
45.	String to Map with token
46.	Generate the client id, which is a fixed string of length 8 concatenated with 12 random bytes
47.	StringBuilder Writer
48.	Return a specific raw resource contents as a String value.
49.	Returns the ISO 8601-format String corresponding to the given duration (measured in milliseconds).
50.	Returns a string representation of the given number of nanoseconds.
51.	Simple Tokenizer
52.	split By Space
53.	Pad Front
54.	Count Occurrences
55.	Padding Left
56.	captalize Words
57.	Tokenizer Utils
58.	Returns space padding
59.	Normalise Whitespace
60.	Removes unwanted blank characters
61.	Removes unwanted backslashes characters
62.	equals Ignore Case
63.	A method to decode/encode quoted printable encoded data
64.	Split Camal Case
65.	Split and combine by token
66.	Shorten text for display in lists etc.