com.gisgraphy.importer.GeonamesZipCodeSimpleImporter.java Source code

Java tutorial

Introduction

Here is the source code for com.gisgraphy.importer.GeonamesZipCodeSimpleImporter.java

Source

/*******************************************************************************
 *   Gisgraphy Project 
 * 
 *   This library is free software; you can redistribute it and/or
 *   modify it under the terms of the GNU Lesser General Public
 *   License as published by the Free Software Foundation; either
 *   version 2.1 of the License, or (at your option) any later version.
 * 
 *   This library is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 *   Lesser General Public License for more details.
 * 
 *   You should have received a copy of the GNU Lesser General Public
 *   License along with this library; if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
 * 
 *  Copyright 2008  Gisgraphy project 
 *  David Masclet <davidmasclet@gisgraphy.com>
 *  
 *  
 *******************************************************************************/
package com.gisgraphy.importer;

import static com.gisgraphy.domain.geoloc.entity.GisFeature.NAME_MAX_LENGTH;

import java.io.File;
import java.util.ArrayList;
import java.util.List;

import org.hibernate.FlushMode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Required;

import com.gisgraphy.domain.geoloc.entity.Adm;
import com.gisgraphy.domain.geoloc.entity.City;
import com.gisgraphy.domain.geoloc.entity.GisFeature;
import com.gisgraphy.domain.geoloc.entity.ZipCode;
import com.gisgraphy.domain.repository.IAdmDao;
import com.gisgraphy.domain.repository.ICityDao;
import com.gisgraphy.domain.repository.IGisFeatureDao;
import com.gisgraphy.domain.repository.IIdGenerator;
import com.gisgraphy.domain.repository.ISolRSynchroniser;
import com.gisgraphy.domain.repository.IZipCodeDao;
import com.gisgraphy.domain.valueobject.GISSource;
import com.gisgraphy.domain.valueobject.NameValueDTO;
import com.gisgraphy.fulltext.FullTextSearchEngine;
import com.gisgraphy.fulltext.FulltextQuery;
import com.gisgraphy.fulltext.FulltextResultsDto;
import com.gisgraphy.fulltext.IFullTextSearchEngine;
import com.gisgraphy.fulltext.SolrResponseDto;
import com.gisgraphy.helper.GeolocHelper;
import com.gisgraphy.service.ServiceException;
import com.vividsolutions.jts.geom.Point;

/**
 * Import the zipcode from a Geonames dump file.
 * 
 * @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a>
 */
public class GeonamesZipCodeSimpleImporter extends AbstractSimpleImporterProcessor {

    protected static final Logger logger = LoggerFactory.getLogger(GeonamesZipCodeSimpleImporter.class);

    protected IGisFeatureDao gisFeatureDao;

    protected IAdmDao admDao;

    protected IFullTextSearchEngine fullTextSearchEngine;

    protected ISolRSynchroniser solRSynchroniser;

    protected ICityDao cityDao;

    protected IZipCodeDao zipCodeDao;

    protected IIdGenerator IdGenerator;

    protected int[] accuracyToDistance = { 50000, 50000, 40000, 10000, 10000, 5000, 3000 };

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#processData
     * (java.lang.String)
     */
    @Override
    protected void processData(String line) {
        String[] fields = line.split("\t");

        /*
         * line table has the following fields :
         * --------------------------------------------------- 0 country code :
         * 1 postal code 2 place name 3 admin1 name 4 admin1 code 5 admin2 name
         * 6 admin2 code2 7 admin3 name 8 admin3 code 9 latitude 10 longitude 11
         * accuracy accuracy
         * 
         * Accuracy is an integer, the higher the better : 1 : estimated as
         * average from numerically neigbouring postal codes 3 : same postal
         * code, other name 4 : place name from geonames db 6 : postal code area
         * centroid
         */

        // check that the csv file line is in a correct format
        //checkNumberOfColumn(fields);

        String code = null;
        int accuracy = 0;
        Point zipPoint = null;
        String countryCode = null;

        //check required field
        if (!isEmptyField(fields, 0, true)) {
            countryCode = fields[0];
        }

        if (!isEmptyField(fields, 1, true)) {
            code = fields[1];
        }

        //check required field
        if (!isEmptyField(fields, 2, true)) {
            //nothing to do just check
        }

        if (!isEmptyField(fields, 11, false)) {
            accuracy = new Integer(fields[11]);
        }

        // Location
        if (!isEmptyField(fields, 10, true) && !isEmptyField(fields, 9, true)) {
            zipPoint = GeolocHelper.createPoint(new Float(fields[10]), new Float(fields[9]));
        }
        City city = getByShape(countryCode, code, zipPoint);
        if (city != null) {
            return;
        }

        Long featureId = findFeature(fields, zipPoint, getAccurateDistance(accuracy));
        GisFeature gisFeature;
        if (featureId != null) {
            logger.info(dumpFields(fields) + " returns " + featureId);
            gisFeature = addAndSaveZipCodeToFeature(code, featureId);
            logger.info("Adding zip " + fields[1] + " to " + gisFeature);
        } else {
            logger.warn(dumpFields(fields) + " returns nothings ");
            gisFeature = addNewEntityAndZip(fields);
            logger.info("Adding new zip " + fields[1] + " to " + gisFeature);
        }
    }

    protected City getByShape(String countryCode, String code, Point zipPoint) {
        City cityByShape = cityDao.getByShape(zipPoint, countryCode, false);
        if (cityByShape != null) {
            ZipCode zipCode = new ZipCode(code);
            //if (feature.getZipCodes() == null || !feature.getZipCodes().contains(zipCode)) {
            cityByShape.addZipCode(zipCode);
            cityDao.save(cityByShape);
        }
        return cityByShape;
    }

    protected Long findFeature(String[] fields, Point zipPoint, int maxDistance) {

        String query;
        boolean extendedsearch;
        if (fields[3] != null) {//adm1Name
            query = fields[2] + " " + fields[3];
            extendedsearch = true;
        } else {
            query = fields[2];//name
            extendedsearch = false;
        }
        FulltextResultsDto results = doAFulltextSearch(query, fields[0]);
        if (results.getNumFound() == 0) {
            if (extendedsearch) {
                // do a basic search
                results = doAFulltextSearch(fields[2], fields[0]);
                if (results.getResultsSize() == 0) {
                    // oops, no results
                    return null;
                } else if (results.getNumFound() == 1) {
                    // we found the one!
                    return results.getResults().get(0).getFeature_id();
                } else {
                    // more than one match iterate and calculate distance and
                    // take the nearest
                    return findNearest(zipPoint, maxDistance, results);
                }
            } else {
                // no features matches in basic search!
                return null;

            }
        } else if (results.getResults().size() == 1) {
            // we found the one!
            return results.getResults().get(0).getFeature_id();
        } else {
            // more than one match, take the nearest
            return findNearest(zipPoint, maxDistance, results);
        }

    }

    protected Long findNearest(Point zipPoint, int maxDistance, FulltextResultsDto results) {
        Long nearestFeatureId = null;
        double nearestDistance = 0;
        for (SolrResponseDto dto : results.getResults()) {
            Point dtoPoint = GeolocHelper.createPoint(new Float(dto.getLng()), new Float(dto.getLat()));
            if (nearestFeatureId == null) {
                nearestFeatureId = dto.getFeature_id();
                nearestDistance = GeolocHelper.distance(zipPoint, dtoPoint);
            } else {
                double distance = GeolocHelper.distance(zipPoint, dtoPoint);
                if (distance > maxDistance) {
                    logger.info(dto.getFeature_id() + " is too far and is not candidate");
                } else {
                    if (distance < nearestDistance) {
                        logger.info(dto.getFeature_id() + "is nearest than " + nearestFeatureId);
                        nearestFeatureId = dto.getFeature_id();
                        nearestDistance = distance;
                    }
                }

            }
        }
        return nearestFeatureId;
    }

    protected int getAccurateDistance(int accuracyLevel) {
        if (accuracyLevel > accuracyToDistance.length - 1) {
            accuracyLevel = accuracyToDistance.length - 1;
        } else if (accuracyLevel < 0) {
            accuracyLevel = 0;
        }
        return accuracyToDistance[accuracyLevel];
    }

    protected GisFeature addNewEntityAndZip(String[] fields) {
        City city = new City();
        long nextFeatureId = IdGenerator.getNextFeatureId();
        city.setFeatureId(nextFeatureId);
        String name = fields[2];
        if (name.length() > NAME_MAX_LENGTH) {
            logger.warn(name + "is too long");
            name = name.substring(0, NAME_MAX_LENGTH - 1);
        }
        city.setName(name);
        // Location
        if (!isEmptyField(fields, 9, true) && !isEmptyField(fields, 10, true)) {
            city.setLocation(GeolocHelper.createPoint(new Float(fields[10]), new Float(fields[9])));
        }
        city.setFeatureClass("P");
        city.setFeatureCode("PPL");
        city.setSource(GISSource.GEONAMES_ZIP);
        city.setCountryCode(fields[0]);
        setAdmCodesWithCSVOnes(fields, city);
        Adm adm;
        if (importerConfig.isTryToDetectAdmIfNotFound()) {
            adm = this.admDao.suggestMostAccurateAdm(fields[0], fields[4], fields[6], fields[8], null, city);
            logger.info("suggestAdm=" + adm);
        } else {
            adm = this.admDao.getAdm(fields[0], fields[4], fields[6], fields[8], null);
        }

        city.setAdm(adm);
        setAdmCodesWithLinkedAdmOnes(adm, city, importerConfig.isSyncAdmCodesWithLinkedAdmOnes());
        setAdmNames(adm, city);
        city.addZipCode(new ZipCode(fields[1]));

        cityDao.save(city);
        //we do not return the saved entity for test purpose
        return city;
    }

    protected GisFeature addAndSaveZipCodeToFeature(String code, Long featureId) {
        GisFeature feature = gisFeatureDao.getByFeatureId(featureId);
        if (feature == null) {
            logger.error(
                    "can not add zip code " + code + " to " + featureId + ", because the feature doesn't exists");
            return null;
        }
        ZipCode zipCode = new ZipCode(code);
        //if (feature.getZipCodes() == null || !feature.getZipCodes().contains(zipCode)) {
        feature.addZipCode(zipCode);
        return gisFeatureDao.save(feature);
        //} else {
        //  logger.warn("the zipcode " + code + " already exists for feature " + featureId);
        //return feature;
        //}
    }

    protected FulltextResultsDto doAFulltextSearch(String query, String countryCode) {
        FulltextQuery fulltextQuery;
        try {
            fulltextQuery = new FulltextQuery(query);
        } catch (IllegalArgumentException e) {
            logger.error("can not create a fulltext query for " + query);
            return new FulltextResultsDto();
        }
        fulltextQuery.limitToCountryCode(countryCode);
        fulltextQuery.withPlaceTypes(com.gisgraphy.fulltext.Constants.CITY_AND_CITYSUBDIVISION_PLACETYPE);

        FulltextResultsDto results;
        try {
            results = fullTextSearchEngine.executeQuery(fulltextQuery);
        } catch (ServiceException e) {
            logger.error("error when executing a fulltext search " + e.getMessage(), e);
            return new FulltextResultsDto();
        }
        return results;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldBeSkiped
     * ()
     */
    @Override
    public boolean shouldBeSkipped() {
        return !importerConfig.isGeonamesImporterEnabled();
    }

    private void setAdmNames(Adm adm, GisFeature gisFeature) {
        if (adm == null) {
            return;
        }
        Adm admTemp = adm;
        do {
            if (admTemp.getLevel() == 1) {
                gisFeature.setAdm1Name(admTemp.getName());
            } else if (admTemp.getLevel() == 2) {
                gisFeature.setAdm2Name(admTemp.getName());
            } else if (admTemp.getLevel() == 3) {
                gisFeature.setAdm3Name(admTemp.getName());
            } else if (admTemp.getLevel() == 4) {
                gisFeature.setAdm4Name(admTemp.getName());
            }
            admTemp = admTemp.getParent();
        } while (admTemp != null);

    }

    private void setAdmCodesWithLinkedAdmOnes(Adm adm, GisFeature gisFeature,
            boolean syncAdmCodesWithLinkedAdmOnes) {

        if (syncAdmCodesWithLinkedAdmOnes) {
            // reset adm code because we might link to an adm3 and adm4 code
            // have
            // been set
            setAdmCodesToNull(gisFeature);
            if (adm != null) {
                if (adm.getAdm1Code() != null) {
                    gisFeature.setAdm1Code(adm.getAdm1Code());
                }
                if (adm.getAdm2Code() != null) {
                    gisFeature.setAdm2Code(adm.getAdm2Code());
                }
                if (adm.getAdm3Code() != null) {
                    gisFeature.setAdm3Code(adm.getAdm3Code());
                }
                if (adm.getAdm4Code() != null) {
                    gisFeature.setAdm4Code(adm.getAdm4Code());
                }
            }

        }
    }

    private void setAdmCodesToNull(GisFeature gisFeature) {
        gisFeature.setAdm1Code(null);
        gisFeature.setAdm2Code(null);
        gisFeature.setAdm3Code(null);
        gisFeature.setAdm4Code(null);
    }

    private void setAdmCodesWithCSVOnes(String[] fields, GisFeature gisFeature) {
        logger.debug("in setAdmCodesWithCSVOnes");
        if (!isEmptyField(fields, 4, false)) {
            gisFeature.setAdm1Code(fields[4]);
        }
        if (!isEmptyField(fields, 6, false)) {
            gisFeature.setAdm2Code(fields[6]);
        }
        if (!isEmptyField(fields, 8, false)) {
            gisFeature.setAdm3Code(fields[8]);
        }
    }

    /*
     * (non-Javadoc)
     * 
     * @seecom.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#
     * shouldIgnoreFirstLine()
     */
    @Override
    protected boolean shouldIgnoreFirstLine() {
        return false;
    }

    /*
     * (non-Javadoc)
     * 
     * @seecom.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#
     * shouldIgnoreComments()
     */
    @Override
    protected boolean shouldIgnoreComments() {
        return true;
    }

    /*
     * (non-Javadoc)
     * 
     * @seecom.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#
     * setCommitFlushMode()
     */
    @Override
    protected void setCommitFlushMode() {
        this.gisFeatureDao.setFlushMode(FlushMode.COMMIT);
        this.cityDao.setFlushMode(FlushMode.COMMIT);
        this.admDao.setFlushMode(FlushMode.COMMIT);
        this.zipCodeDao.setFlushMode(FlushMode.COMMIT);
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#flushAndClear
     * ()
     */
    @Override
    protected void flushAndClear() {
        this.gisFeatureDao.flushAndClear();
        this.cityDao.flushAndClear();
        this.admDao.flushAndClear();
        this.zipCodeDao.flushAndClear();
    }

    /*
     * (non-Javadoc)
     * 
     * @seecom.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#
     * getNumberOfColumns()
     */
    @Override
    protected int getNumberOfColumns() {
        return 12;
    }

    /**
     * @param cityDao
     *            The CityDao to set
     */
    @Required
    public void setCityDao(ICityDao cityDao) {
        this.cityDao = cityDao;
    }

    /**
     * @param gisFeatureDao
     *            The GisFeatureDao to set
     */
    @Required
    public void setGisFeatureDao(IGisFeatureDao gisFeatureDao) {
        this.gisFeatureDao = gisFeatureDao;
    }

    /**
     * @param admDao
     *            the admDao to set
     */
    @Required
    public void setAdmDao(IAdmDao admDao) {
        this.admDao = admDao;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#setup()
     */
    @Override
    protected void setup() {
        super.setup();
        cityDao.createGISTIndexForShapeColumn();
        FullTextSearchEngine.disableLogging = true;
        IdGenerator.sync();
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#tearDown()
     */
    @Override
    protected void tearDown() {
        String savedMessage = this.statusMessage;
        FullTextSearchEngine.disableLogging = false;
        this.statusMessage = internationalisationService.getString("import.teardown");
        try {
            super.tearDown();
            if (!solRSynchroniser.commit()) {
                logger.warn("The commit in tearDown of " + this.getClass().getSimpleName()
                        + " has failed, the uncommitted changes will be commited with the auto commit of solr in few minuts");
            }
            solRSynchroniser.optimize();
        } finally {
            this.statusMessage = savedMessage;
        }
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getFiles()
     */
    @Override
    protected File[] getFiles() {
        return ImporterHelper.listCountryFilesToImport(importerConfig.getGeonamesZipCodeDir());
    }

    /**
     * @param solRSynchroniser
     *            the solRSynchroniser to set
     */
    @Required
    public void setSolRSynchroniser(ISolRSynchroniser solRSynchroniser) {
        this.solRSynchroniser = solRSynchroniser;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.gisgraphy.domain.geoloc.importer.IGeonamesProcessor#rollback()
     */
    public List<NameValueDTO<Integer>> rollback() {
        List<NameValueDTO<Integer>> deletedObjectInfo = new ArrayList<NameValueDTO<Integer>>();
        // we first reset subClass
        int deletedgis = zipCodeDao.deleteAll();
        logger.warn("deleting zipCodes...");
        // we don't want to remove adm because some feature can be linked again
        if (deletedgis != 0) {
            deletedObjectInfo.add(new NameValueDTO<Integer>(GisFeature.class.getSimpleName(), deletedgis));
        }
        resetStatus();
        return deletedObjectInfo;
    }

    @Required
    public void setZipCodeDao(IZipCodeDao zipCodeDao) {
        this.zipCodeDao = zipCodeDao;
    }

    @Required
    public void setFullTextSearchEngine(IFullTextSearchEngine fullTextSearchEngine) {
        this.fullTextSearchEngine = fullTextSearchEngine;
    }

    @Required
    public void setIdGenerator(IIdGenerator idGenerator) {
        IdGenerator = idGenerator;
    }

}