com.ikanow.infinit.e.harvest.enrichment.legacy.alchemyapi.AlchemyEntityGeoCleanser.java Source code

Java tutorial

Introduction

Here is the source code for com.ikanow.infinit.e.harvest.enrichment.legacy.alchemyapi.AlchemyEntityGeoCleanser.java

Source

/*******************************************************************************
 * Copyright 2012, The Infinit.e Open Source Project.
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License, version 3,
 * as published by the Free Software Foundation.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 ******************************************************************************/
package com.ikanow.infinit.e.harvest.enrichment.legacy.alchemyapi;

import java.lang.reflect.Type;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.WordUtils;

import com.mongodb.BasicDBObject;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.MongoException;
import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken;
import com.ikanow.infinit.e.data_model.store.MongoDbManager;
import com.ikanow.infinit.e.data_model.store.document.DocumentPojo;
import com.ikanow.infinit.e.data_model.store.document.EntityPojo;
import com.ikanow.infinit.e.data_model.store.feature.geo.GeoFeaturePojo;

//______________________________________________________________________________________

public class AlchemyEntityGeoCleanser {

    // Stats:
    private int _nDocusModified = 0;
    private int _nDocusProcessed = 0;
    private int _nStayedWithOriginal = 0;
    private int _nMovedToRegion = 0;
    private int _nMovedToLargeCity = 0;
    private int _nMovedToForeignCity = 0;

    public int getDocusModified() {
        return _nDocusModified;
    }

    public int getDocusProcessed() {
        return _nDocusProcessed;
    }

    public int getStayedWithOriginal() {
        return _nStayedWithOriginal;
    }

    public int getMovedToRegion() {
        return _nMovedToRegion;
    }

    public int getMovedToLargeCity() {
        return _nMovedToLargeCity;
    }

    public int getMovedToForeignCity() {
        return _nMovedToForeignCity;
    }

    // Debug:
    private int _nDebugLevel = 0;

    public void setDebugLevel(int nDebugLevel) { //1==replacements, 2=feeds/candidate entities, 3=entities, 4=decomposition
        _nDebugLevel = nDebugLevel;
    }

    //______________________________________________________________________________________

    // Processing code
    //______________________________________________________________________________________

    // Top level logic
    // For running remotely
    // For cleaning local feeds, just call cleansePeopleInDocu(feed)
    // Host/Port - obvious
    // HexSlice - sub-samples somewhat efficiently, on last specified digits of _id
    // userQuery - lets the calling function decide what data to run on (probably for debugging)
    // nLimit - the max number of entries returned (for debugging)
    // bAlterDB - writes the results back to the DB (else it's just for debugging)

    public void doProcessing(int nSkip, BasicDBObject userQuery, int nLimit, boolean bAlterDB)
            throws NumberFormatException, UnknownHostException, MongoException {

        // Initialization (regexes and stuff)
        this.initialize();

        // Launch MongoDB query

        BasicDBObject query = userQuery;
        if (null == query) {
            new BasicDBObject();
        }

        // Just get the entity list out to save a few CPU cycles
        BasicDBObject outFields = new BasicDBObject();
        outFields.append(DocumentPojo.entities_, 1);
        outFields.append(DocumentPojo.url_, 1); // (help with debugging)
        outFields.append(DocumentPojo.title_, 1); // (help with debugging) 

        DBCursor dbc = null;
        if (nLimit > 0) {
            dbc = _docsDB.find(query, outFields).limit(nLimit).skip(nSkip);
        } else { // Everything!
            dbc = _docsDB.find(query, outFields).skip(nSkip);
        }

        // Create POJO array of documents (definitely not the most efficient, but 
        // will make integration with the harvester easier)

        List<DocumentPojo> docus = DocumentPojo.listFromDb(dbc, DocumentPojo.listType());

        // Loop over array and invoke the cleansing function for each one

        for (DocumentPojo docu : docus) {
            if (this.cleanseGeoInDocu(docu)) {
                this._nDocusModified++;

                if (bAlterDB) {

                    BasicDBObject inner0 = new BasicDBObject(DocumentPojo.entities_,
                            (DBObject) com.mongodb.util.JSON.parse(new Gson().toJson(docu.getEntities())));
                    BasicDBObject inner1 = new BasicDBObject(MongoDbManager.set_, inner0);

                    // Overwrite the existing entities list with the new one 
                    _docsDB.update(new BasicDBObject(DocumentPojo._id_, docu.getId()), inner1, false, true);
                    // (need the multi-update in case _id isn't the shard key - documentation claims this is not necessary but 2.4.6/shell still enforces it)

                } //TESTED
            }
            this._nDocusProcessed++;
        }
    }
    //________________________________________________   

    // Initialization variables

    private DBCollection _docsDB = null;
    private DBCollection _georefDB = null;

    private static final String _stateList = "Alabama|Alaska|American Samoa|Arizona|Arkansas|California|Colorado|Connecticut|Delaware|D\\.C\\.|District of Columbia|Florida|Georgia|Guam|Hawaii|Idaho|Illinois|Indiana|Iowa|Kansas|Kentucky|Louisiana|Maine|Maryland|Massachusetts|Michigan|Minnesota|Mississippi|Missouri|Montana|Nebraska|Nevada|New Hampshire|New Jersey|New Mexico|New York|North Carolina|North Dakota|Northern Marianas Islands|Ohio|Oklahoma|Oregon|Pennsylvania|Puerto Rico|Rhode Island|South Carolina|South Dakota|Tennessee|Texas|Utah|Vermont|Virginia|Virgin Islands|Washington|West Virginia|Wisconsin|Wyoming";
    private Pattern _statesRegex = null;

    private static final String _abbrStateList = "(?:m\\.d|n\\.j|n.m|conn|mich|al\\.|d\\.c|vt|calif|wash\\.|ore\\.|ind\\.)\\.?";
    private Pattern _abbrStateRegex = null;

    //________________________________________________

    // Initialization code
    // Call with null/null to act on local objects vs fetching them from the DB

    public void initialize() throws NumberFormatException, UnknownHostException, MongoException {

        // MongoDB      
        _docsDB = MongoDbManager.getDocument().getMetadata();
        _georefDB = MongoDbManager.getFeature().getGeo();

        // Regex of US states
        _statesRegex = Pattern.compile(_stateList);
        _abbrStateRegex = Pattern.compile(_abbrStateList);
    }
    //________________________________________________

    // Inner loop processing logic

    public static class Candidate {
        EntityPojo entity;
        LinkedList<GeoFeaturePojo> candidates;
        String state;

        Candidate(EntityPojo ent, LinkedList<GeoFeaturePojo> cands, String st) {
            entity = ent;
            candidates = cands;
            state = st;
        }
    }

    public boolean cleanseGeoInDocu(DocumentPojo doc) {

        boolean bChangedAnything = false;

        Map<String, Candidate> dubiousLocations = new HashMap<String, Candidate>();

        Set<String> otherRegions = new HashSet<String>();
        Set<String> otherCountries = new HashSet<String>();
        Set<String> otherCountriesOrRegionsReferenced = new HashSet<String>();

        //Debug
        if (_nDebugLevel >= 2) {
            System.out.println(
                    "+++++++ Doc: " + doc.getTitle() + " / " + doc.getId() + " / " + doc.getEntities().size());
        }

        // 1] First off, let's find anything location-based and also determine if it's bad or not 

        if (null != doc.getEntities())
            for (EntityPojo ent : doc.getEntities()) {

                boolean bStrongCandidate = false;

                // People: decompose names
                if (EntityPojo.Dimension.Where == ent.getDimension()) {

                    // So locations get disambiguated to one of:
                    // "<city-etc>, <region-or-country>", or "<region-or-country>"
                    // though can also just be left as they are.

                    String sActualName = ent.getActual_name().toLowerCase();
                    if (!ent.getDisambiguatedName().toLowerCase().equals(sActualName)) {
                        // It's been disambiguated

                        //Debug
                        if (_nDebugLevel >= 3) {
                            System.out.println("disambiguous candidate: " + ent.getDisambiguatedName() + " VS "
                                    + ent.getActual_name() + " ("
                                    + ((null != ent.getSemanticLinks()) ? ent.getSemanticLinks().size() : 0) + ")");
                        }

                        // OK next step, is it a disambiguation to a US town?
                        String splitMe[] = ent.getDisambiguatedName().split(", ");
                        if (2 == splitMe.length) {

                            String stateOrCountry = splitMe[1];
                            Matcher m = _statesRegex.matcher(stateOrCountry);
                            if (m.find()) { // This is a US disambiguation - high risk case
                                // Short cut if state is already directly mentioned?
                                stateOrCountry = stateOrCountry.toLowerCase();

                                if (!otherRegions.contains(stateOrCountry)) { // See list below - no need to go any further

                                    // OK next step - is it a possible ambiguity:
                                    ArrayList<BasicDBObject> x = new ArrayList<BasicDBObject>();
                                    BasicDBObject inner0_0 = new BasicDBObject(MongoDbManager.not_,
                                            Pattern.compile("US"));
                                    BasicDBObject inner1_0 = new BasicDBObject("country_code", inner0_0);
                                    x.add(inner1_0);

                                    BasicDBObject inner0_1 = new BasicDBObject(MongoDbManager.gte_, 400000);
                                    BasicDBObject inner1_1 = new BasicDBObject("population", inner0_1);
                                    x.add(inner1_1);

                                    BasicDBObject dbo = new BasicDBObject();
                                    dbo.append("search_field", sActualName);
                                    dbo.append(MongoDbManager.or_, x);

                                    DBCursor dbc = _georefDB.find(dbo);
                                    if (dbc.size() >= 1) { // Problems!

                                        //Create list of candidates

                                        Type listType = new TypeToken<LinkedList<GeoFeaturePojo>>() {
                                        }.getType();
                                        LinkedList<GeoFeaturePojo> grpl = new Gson()
                                                .fromJson(dbc.toArray().toString(), listType);

                                        //Debug
                                        if (_nDebugLevel >= 2) {
                                            System.out.println("\tERROR CANDIDATE: " + ent.getDisambiguatedName()
                                                    + " VS " + ent.getActual_name() + " (" + dbc.count() + ")");

                                            if (_nDebugLevel >= 3) {
                                                for (GeoFeaturePojo grp : grpl) {
                                                    System.out.println("\t\tCandidate:" + grp.getCity() + " / "
                                                            + grp.getRegion() + " / " + grp.getCountry());
                                                }
                                            }
                                        }

                                        Candidate candidate = new Candidate(ent, grpl, stateOrCountry);
                                        dubiousLocations.put(ent.getIndex(), candidate);
                                        bStrongCandidate = true;

                                    } // if strong candidate
                                } //TESTED ("reston, virginia" after "virginia/stateorcounty" mention)
                                  // (end if can't shortcut past all this)

                            } // end if a US town
                        } // end if in the format "A, B"

                    } // if weak candidate
                      //TESTED

                    if (!bStrongCandidate) { // Obv can't count on a disambiguous candidate:               
                        String type = ent.getType().toLowerCase();

                        if (type.equals("stateorcounty")) {
                            String disName = ent.getDisambiguatedName().toLowerCase();
                            if (_abbrStateRegex.matcher(disName).matches()) {
                                otherRegions.add(getStateFromAbbr(disName));
                            } else {
                                otherRegions.add(ent.getDisambiguatedName().toLowerCase());
                            }
                            otherCountriesOrRegionsReferenced.add("united states");
                        } //TESTED: "mich./stateorcounty"
                        else if (type.equals("country")) {
                            String disName = ent.getDisambiguatedName().toLowerCase();

                            // Translation of known badly transcribed countries:
                            // (England->UK)
                            if (disName.equals("england")) {
                                otherCountries.add("united kingdom");
                            } //TESTED
                            else {
                                otherCountries.add(ent.getDisambiguatedName().toLowerCase());
                            }
                        } else if (type.equals("region")) {
                            otherRegions.add(ent.getDisambiguatedName().toLowerCase());
                        } else if (type.equals("city")) {
                            String splitMe[] = ent.getDisambiguatedName().split(",\\s*");
                            if (2 == splitMe.length) {
                                otherCountriesOrRegionsReferenced.add(splitMe[1].toLowerCase());
                                if (this._statesRegex.matcher(splitMe[1]).find()) {
                                    otherCountriesOrRegionsReferenced.add("united states");
                                } //TESTED: "lexingon, kentucky/city"
                            }
                        }
                    } //TESTED: just above clauses

                } // if location

            } // (end loop over entities)

        // Debug:
        if ((_nDebugLevel >= 3) && (!dubiousLocations.isEmpty())) {
            for (String s : otherRegions) {
                System.out.println("Strong region: " + s);
            }
            for (String s : otherCountries) {
                System.out.println("Strong countries: " + s);
            }
            for (String s : otherCountriesOrRegionsReferenced) {
                System.out.println("Weak regionscountries: " + s);
            }
        }

        // 2] The requirements and algorithm are discussed in 
        // http://ikanow.jira.com/wiki/display/INF/Beta...+improving+AlchemyAPI+extraction+%28geo%29
        // Canonical cases:
        // Darfur -> Darfur, MN even though Sudan and sometimes Darfur, Sudan are present
        // Shanghai -> Shanghai, WV even though China is mentioned (and not WV)
        // Manchester -> Manchester village, NY (not Manchester, UK)
        // Philadelphia -> Philadelphia (village), NY (though NY is mentioned and not PA) 

        // We're generating the following order
        //       10] Sitting tenant with strong direct
        //       15] Large city with strong direct      
        //       20] Region with direct
        //       30] Large city with strong indirect
        //       40] Sitting tenant with strong indirect 
        //       50] Region with indirect
        //       60] Another foreign possibility with strong direct 
        //       70] Large city with weak direct
        //       72] Large city with weak indirect
        //       75] Large city with no reference 
        //       78] Another foreign possibility with strong indirect (>100K population - ie not insignificant) 
        //       80] Sitting tenant with any weak (US) direct or indirect 
        //       90] Another foreign possibility with strong indirect 
        //      100] Another foreign possibility with weak direct 
        //      110] Another foreign possibility with weak indirect 
        //      120] Region with no reference, if there is only 1
        //      130] Sitting tenant with none of the above (ie default)
        //      140] Anything else!

        for (Map.Entry<String, Candidate> pair : dubiousLocations.entrySet()) {
            EntityPojo ent = pair.getValue().entity;
            Candidate candidate = pair.getValue();

            // 2.1] Let's analyse the "sitting tenant"

            int nPrio = 130;
            GeoFeaturePojo currLeader = null;
            int nCase = 0; // (just for debugging, 0=st, 1=large city, 2=region, 3=other)

            if (otherRegions.contains(candidate.state)) { // Strong direct ref, winner!
                nPrio = 10; // winner!
            } //TESTED: "san antonio, texas/city" vs "texas"
            else if (otherCountriesOrRegionsReferenced.contains(candidate.state)) {
                // Indirect ref
                nPrio = 40; // good, but beatable...
            } //TESTED: "philadelphia (village), new york/city" 
            else if (otherCountries.contains("united states")) { // Weak direct ref
                nPrio = 80; // better than nothing...            
            } //TESTED: "apache, oklahoma/city"
            else if (otherCountriesOrRegionsReferenced.contains("united states")) { // Weak indirect ref
                nPrio = 80; // better than nothing...            
            } //TESTED: "washington, d.c." have DC as stateorcounty, but US in countries list

            // Special case: we don't like "village":
            if ((80 != nPrio) && ent.getDisambiguatedName().contains("village")
                    && !ent.getActual_name().contains("village")) {
                nPrio = 80;
            } //TESTED: "Downvoted: Philadelphia (village), New York from Philadelphia"

            // Debug
            if (_nDebugLevel >= 2) {
                System.out.println(pair.getKey() + " SittingTenantScore=" + nPrio);
            }

            // Alternatives
            if (nPrio > 10) {

                LinkedList<GeoFeaturePojo> geos = pair.getValue().candidates;
                for (GeoFeaturePojo geo : geos) {

                    int nAltPrio = 140;
                    int nAltCase = -1;
                    String city = (null != geo.getCity()) ? geo.getCity().toLowerCase() : null;
                    String region = (null != geo.getRegion()) ? geo.getRegion().toLowerCase() : null;
                    String country = (null != geo.getCountry()) ? geo.getCountry().toLowerCase() : null;

                    // 2.2] CASE 1: I'm a city with pop > 1M (best score 15)
                    //                15] Large city with strong direct      
                    //                30] Large city with strong indirect
                    //                70] Large city with weak direct
                    //                72] Large city with weak indirect
                    //                75] Large city with no reference                

                    if ((null != city) && (geo.getPopulation() >= 400000) && (nPrio > 15)) {
                        nAltCase = 1;

                        if ((null != region) && (otherRegions.contains(region))) {
                            nAltPrio = 15; // strong direct
                        } //TESTED: "dallas / Texas / United States = 15"
                        else if ((null != region) && (otherCountriesOrRegionsReferenced.contains(region))) {
                            nAltPrio = 30; // strong indirect
                        } //TESTED: "sacramento / California / United State"
                        else if ((null != country) && (otherCountries.contains(country))) {
                            nAltPrio = 70; // weak direct 
                        } //TESTED: "berlin, germany", with "germany" directly mentioned
                        else if ((null != country) && (otherCountriesOrRegionsReferenced.contains(country))) {
                            nAltPrio = 72; // weak indirect 
                        } //TESTED: "los angeles / California / United States = 72"
                        else {
                            nAltPrio = 75; // just for being big!
                        } //TESTED: "barcelona, spain"
                    }

                    // 2.3] CASE 2: I'm a region (best score=20, can beat current score)
                    //                20] Region with direct
                    //                50] Region with indirect
                    //               120] Region with no reference, if there is only 1

                    else if ((null == city) && (nPrio > 20)) {
                        nAltCase = 2;

                        if ((null != country) && (otherCountries.contains(country))) {
                            nAltPrio = 20; // strong direct 
                        } //TESTED: (region) "Berlin, Germany" with "Germany" mentioned
                        else if ((null != country) && (otherCountriesOrRegionsReferenced.contains(country))) {
                            nAltPrio = 50; // strong indirect 
                        } //(haven't seen, but we'll live)
                        else {
                            nAltPrio = 120; // (just for being there)
                        } //TESTED: "null / Portland / Jamaica = 120", also "Shanghai / China"
                    }

                    // 2.4] CASE 3: I'm any foreign possibility (best score=60)
                    //                60] Another foreign possibility with strong direct 
                    //                78] Another foreign possibility with strong indirect (>100K population - ie not insignificant) 
                    //                90] Another foreign possibility with strong indirect 
                    //               100] Another foreign possibility with weak direct 
                    //               110] Another foreign possibility with weak indirect 

                    else if (nPrio > 60) {
                        nAltCase = 3;

                        if ((null != region) && (otherRegions.contains(region))) {
                            nAltPrio = 60; // strong direct

                            // Double check we're not falling into the trap below:
                            if (!geo.getCountry_code().equals("US")) {
                                Matcher m = this._statesRegex.matcher(geo.getRegion());
                                if (m.matches()) { // non US state matching against (probably) US state, disregard)
                                    nAltPrio = 140;
                                }
                            } //TESTED (same clause as below)

                        } //TESTED: lol "philadelphia / Maryland / Liberia = 60" (before above extra clause)

                        if (nAltPrio > 60) { // (may need to re-run test)
                            if ((null != country) && (otherCountries.contains(country))) {
                                if (geo.getPopulation() < 100000) {
                                    nAltPrio = 90; // strong indirect
                                } //TESTED: "washington / Villa Clara / Cuba"
                                else {
                                    nAltPrio = 78; // strong indirect, with boost!                        
                                } //TESTED: "geneva, Geneve, Switzerland", pop 180K
                            } else if ((null != region) && (otherCountriesOrRegionsReferenced.contains(region))) {
                                nAltPrio = 100; // weak direct
                            } //TESTED: "lincoln / Lincolnshire / United Kingdom = 100"
                            else if ((null != country) && (otherCountriesOrRegionsReferenced.contains(country))) {
                                nAltPrio = 110; // weak indirect
                            } //(haven't seen, but we'll live)                  
                        }
                    }
                    // Debug:
                    if ((_nDebugLevel >= 2) && (nAltPrio < 140)) {
                        System.out.println("----Alternative: " + geo.getCity() + " / " + geo.getRegion() + " / "
                                + geo.getCountry() + " score=" + nAltPrio);
                    }

                    // Outcome of results:

                    if (nAltPrio < nPrio) {
                        currLeader = geo;
                        nPrio = nAltPrio;
                        nCase = nAltCase;
                    }
                } // end loop over alternativse

                if (null != currLeader) { // Need to change

                    if (1 == nCase) {
                        this._nMovedToLargeCity++;

                        //(Cities are lower case in georef DB for some reason)
                        String city = WordUtils.capitalize(currLeader.getCity());

                        if (currLeader.getCountry_code().equals("US")) { // Special case: is this just the original?

                            String region = currLeader.getRegion();
                            if (region.equals("District of Columbia")) { // Special special case
                                region = "D.C.";
                            }
                            String sCandidate = city + ", " + region;

                            if (!sCandidate.equals(ent.getDisambiguatedName())) {
                                ent.setDisambiguatedName(sCandidate);
                                ent.setIndex(ent.getDisambiguatedName() + "/city");
                                ent.setSemanticLinks(null);
                                bChangedAnything = true;
                            } //TESTED (lots, eg "Philadelphia (village), New York" -> "Philadelphia, PA"; Wash, Ill. -> Wash DC)
                            else {
                                this._nMovedToLargeCity--;
                                _nStayedWithOriginal++;
                            } //TESTED ("Washington DC", "San Juan, Puerto Rico")
                        } //TESTED (see above)
                        else {
                            ent.setDisambiguatedName(city + ", " + currLeader.getCountry());
                            ent.setIndex(ent.getDisambiguatedName() + "/city");
                            ent.setSemanticLinks(null);
                            bChangedAnything = true;
                        } //TESTED: "london, california/city to London, United Kingdom"
                    } else if (2 == nCase) {
                        this._nMovedToRegion++;
                        ent.setDisambiguatedName(currLeader.getRegion() + ", " + currLeader.getCountry());
                        ent.setIndex(ent.getDisambiguatedName() + "/region");
                        ent.setSemanticLinks(null);
                        bChangedAnything = true;

                    } //TESTED: "Moved madrid, new york/city to Madrid, Spain" (treats Madrid as region, like Berlin see above)
                    else {
                        //(Cities are lower case in georef DB for some reason)
                        String city = WordUtils.capitalize(currLeader.getCity());

                        this._nMovedToForeignCity++;
                        ent.setDisambiguatedName(city + ", " + currLeader.getCountry());
                        ent.setIndex(ent.getDisambiguatedName() + "/city");
                        ent.setSemanticLinks(null);
                        bChangedAnything = true;

                    } //TESTED: "Moved geneva, new york/city to Geneva, Switzerland"

                    if ((_nDebugLevel >= 1) && (null == ent.getSemanticLinks())) {
                        System.out.println("++++ Moved " + pair.getKey() + " to " + ent.getDisambiguatedName());
                    }
                } else {
                    _nStayedWithOriginal++;
                }

            } // (if sitting tenant not holder)

        } // (end loop over candidates)      

        if ((_nDebugLevel >= 1) && bChangedAnything) {
            System.out.println("\t(((Doc: " + doc.getTitle() + " / " + doc.getId() + " / " + doc.getUrl() + ")))");
        }

        return bChangedAnything;
    }

    /////////////////////////////////////////////////////////////////////////////////////////////

    // Utility: state abbrievations:

    private static String getStateFromAbbr(String s) {

        if (s.endsWith(".")) {
            s = s.substring(0, s.length() - 1);
        }
        if (s.equals("m.d")) {
            s = "maryland";
        } else if (s.equals("n.m")) {
            s = "new mexico";
        } else if (s.equals("conn")) {
            s = "connecticut";
        } else if (s.equals("mich")) {
            s = "michigan";
        } else if (s.equals("n.j")) {
            s = "new jersey";
        } else if (s.equals("al")) {
            s = "alabama";
        } else if (s.equals("d.c")) {
            s = "district of columbia";
        } else if (s.equals("vt")) {
            s = "vermont";
        } else if (s.equals("calif")) {
            s = "california";
        } else if (s.equals("wash")) {
            s = "washington";
        } else if (s.equals("ore")) {
            s = "oregon";
        } else if (s.equals("ind")) {
            s = "indiana";
        }
        return s;
    }
}