org.comicwiki.transforms.ComicCharacterTransform.java Source code

Java tutorial

Introduction

Here is the source code for org.comicwiki.transforms.ComicCharacterTransform.java

Source

/*******************************************************************************
 * See the NOTICE file distributed with this work for additional 
 * information regarding copyright ownership. ComicGenie licenses this 
 * file to you under the Apache License, Version 2.0 (the "License"); 
 * you may not use this file except in compliance with the License.  
 * You may obtain a copy of the License at
 *  
 *     http://www.apache.org/licenses/LICENSE-2.0
 *  
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/
package org.comicwiki.transforms;

import static com.google.common.base.Preconditions.checkNotNull;

import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.comicwiki.HonorificExpander;
import org.comicwiki.PersonNameMatcher;
import org.comicwiki.Repositories;
import org.comicwiki.RepositoryTransform;
import org.comicwiki.model.schema.Person;

import com.google.common.base.Splitter;
import com.google.common.base.Strings;
import com.google.common.collect.Iterables;

public final class ComicCharacterTransform implements RepositoryTransform {
    private static Pattern impliedFemale = Pattern.compile(".*(girl|woman).*", Pattern.CASE_INSENSITIVE);

    private static Pattern impliedMale = Pattern.compile(".*\\b(man)\\b", Pattern.CASE_INSENSITIVE);

    private static Pattern middleInitial = Pattern.compile("\\w{1}(\\.){1}", Pattern.CASE_INSENSITIVE);
    private static Pattern prefixFemale = Pattern.compile(
            "^(mrs|miss|ms|lady|princess|queen|dame|madam|ma'am|duchess|viscountess|countess|baroness|aunt)( |\\.){1}(.*)",
            Pattern.CASE_INSENSITIVE);

    private static Pattern prefixMale = Pattern.compile(
            "^(mr|sir|lord|master|king|prince|count|duke|baron|viscount|knight|earl|emperor|uncle|shiek)( |\\.){1}(.*)",
            Pattern.CASE_INSENSITIVE);
    private static Pattern prefixNeutral = Pattern.compile(
            "(^(ambassador|private|constable|senator|corporal|mayor|cap'n|professor|president|inspector|detective|doctor|doc|dr|sgt|col|captain|major|maj|officer|general|gen|prof|capt|sergeant|sheriff|lieutenant|lt)( |\\.){1})(.*)",
            Pattern.CASE_INSENSITIVE);

    private static Pattern suffixMale = Pattern.compile("(.*){1}(esq|esquire|caesar)\\.*$",
            Pattern.CASE_INSENSITIVE);

    private static void extractNames(Person person, String[] nameTokens) {
        if (firstAndLastName(nameTokens)) {
            person.familyName = nameTokens[1];
        } else if (nameTokens.length == 3 && middleInitial.matcher(nameTokens[1]).matches()) {
            person.familyName = nameTokens[2];
        }
    }

    private static boolean firstAndLastName(String[] tokens) {
        return tokens.length == 2;
    }

    private static boolean oneName(String[] tokens) {
        return tokens.length == 1;
    }

    private static void oneNameWithGender(Person person, String[] nameTokens, boolean hasGender) {
        if (oneName(nameTokens)) {
            if (hasGender) {
                person.givenName = nameTokens[0];
            } else {
                person.familyName = nameTokens[0];
            }
        }
    }

    private static String[] tokenize(String text) {
        return Iterables.toArray(Splitter.on(' ').trimResults().omitEmptyStrings().split(text), String.class);
    }

    private final PersonNameMatcher personMatcher;

    private final Repositories repositories;

    public ComicCharacterTransform(PersonNameMatcher personMatcher, Repositories repositories) {
        this.personMatcher = personMatcher;
        this.repositories = repositories;
    }

    protected String removePrefixAndSuffix(String name) {
        checkNotNull(name, "name");
        String s = name;
        Matcher m = prefixNeutral.matcher(s);
        if (m.matches()) {
            s = m.group(m.groupCount());
        }

        m = suffixMale.matcher(s);
        if (m.matches()) {
            s = m.group(m.groupCount() - 1);
        }

        m = prefixMale.matcher(s);
        if (m.matches()) {
            s = m.group(m.groupCount());
        }

        m = prefixFemale.matcher(s);
        if (m.matches()) {
            s = m.group(m.groupCount());
        }
        return s.trim();
    }

    @Override
    public void transform() throws IOException {
        for (Person person : repositories.COMIC_CHARACTERS.cache.values()) {
            if (Strings.isNullOrEmpty(person.name)) {
                continue;
            }

            String[] nameTokens = tokenize(removePrefixAndSuffix(person.name));
            if (nameTokens.length == 0) {
                continue;
            }

            Matcher prefixMaleMatcher = prefixMale.matcher(person.name);
            Matcher suffixMaleMatcher = suffixMale.matcher(person.name);
            Matcher prefixFemaleMatcher = prefixFemale.matcher(person.name);
            Matcher prefixNeutralMatcher = prefixNeutral.matcher(person.name);

            boolean isImpliedMale = impliedMale.matcher(person.name).matches();
            boolean isImpliedFemale = impliedFemale.matcher(person.name).matches();

            boolean isMale = personMatcher.isMaleName(nameTokens[0]);
            boolean isFemale = isMale ? false : (personMatcher.isFemaleName(nameTokens[0]));

            if (prefixMaleMatcher.matches()) {
                person.honorificPrefix = prefixMaleMatcher.group(1).trim();
                person.makeMale();
                oneNameWithGender(person, nameTokens, isMale);
            } else if (suffixMaleMatcher.matches()) {
                person.honorificSuffix = suffixMaleMatcher.group(1).trim();
                person.makeMale();
                oneNameWithGender(person, nameTokens, isMale);
            } else if (prefixFemaleMatcher.matches()) {
                person.honorificPrefix = prefixFemaleMatcher.group(1).trim();
                person.makeFemale();
                oneNameWithGender(person, nameTokens, isFemale);
            } else if (prefixNeutralMatcher.matches()) {
                person.honorificPrefix = HonorificExpander.expand(prefixNeutralMatcher.group(1).trim());
                if (oneName(nameTokens)) {
                    if (isFemale || isMale) {
                        person.givenName = nameTokens[0];
                    } else if (personMatcher.isLastName(nameTokens[0])) {
                        person.familyName = nameTokens[0];
                    }
                }
            } else if (isFemale || isImpliedFemale) {
                person.makeFemale();
                if (oneName(nameTokens)) {
                    person.givenName = nameTokens[0];
                }
            } else if (isMale || isImpliedMale) {
                person.makeMale();
                if (oneName(nameTokens)) {
                    person.givenName = nameTokens[0];
                }
            } else if (oneName(nameTokens)) {// one name, no gender
                Matcher lastNamesMatcher = personMatcher.lastNames(nameTokens[0]);
                if (lastNamesMatcher.matches()) {
                    person.familyName = nameTokens[0];
                }
            }

            if (nameTokens.length > 1) {
                Matcher isMaleName = personMatcher.maleNames(nameTokens[0]);
                if ("M".equals(person.gender) && isMaleName.matches()) {
                    person.givenName = isMaleName.group(1);
                    extractNames(person, nameTokens);
                }

                Matcher isFemaleName = personMatcher.femaleNames(nameTokens[0]);
                if ("F".equals(person.gender) && isFemaleName.matches()) {
                    person.givenName = isFemaleName.group(1);
                    extractNames(person, nameTokens);
                }

                Matcher lastNamesMatcher = personMatcher.lastNames(nameTokens[nameTokens.length - 1]);
                if (lastNamesMatcher.matches()) {
                    person.familyName = lastNamesMatcher.group(1);
                    if (nameTokens.length == 3 && Strings.isNullOrEmpty(person.givenName)) {
                        if (middleInitial.matcher(nameTokens[1]).matches()) {
                            person.givenName = nameTokens[0];
                        } else {
                            person.givenName = nameTokens[1];
                            isMaleName = personMatcher.maleNames(nameTokens[1]);
                            isFemaleName = personMatcher.femaleNames(nameTokens[1]);
                            if (isMaleName.matches()) {
                                person.makeMale();
                            } else if (isFemaleName.matches()) {
                                person.makeFemale();
                            }
                        }
                    }
                }
            }
        }
    }
}