com.gisgraphy.importer.OpenStreetMapPoisSimpleImporter.java Source code

Java tutorial

Introduction

Here is the source code for com.gisgraphy.importer.OpenStreetMapPoisSimpleImporter.java

Source

/*******************************************************************************
 *   Gisgraphy Project 
 * 
 *   This library is free software; you can redistribute it and/or
 *   modify it under the terms of the GNU Lesser General Public
 *   License as published by the Free Software Foundation; either
 *   version 2.1 of the License, or (at your option) any later version.
 * 
 *   This library is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 *   Lesser General Public License for more details.
 * 
 *   You should have received a copy of the GNU Lesser General Public
 *   License along with this library; if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
 * 
 *  Copyright 2008  Gisgraphy project 
 *  David Masclet <davidmasclet@gisgraphy.com>
 *  
 *  
 *******************************************************************************/
package com.gisgraphy.importer;

import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.hibernate.FlushMode;
import org.hibernate.exception.ConstraintViolationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Required;

import com.gisgraphy.domain.geoloc.entity.AlternateName;
import com.gisgraphy.domain.geoloc.entity.City;
import com.gisgraphy.domain.geoloc.entity.GisFeature;
import com.gisgraphy.domain.geoloc.entity.PostOffice;
import com.gisgraphy.domain.geoloc.entity.ZipCode;
import com.gisgraphy.domain.repository.ICityDao;
import com.gisgraphy.domain.repository.IGisFeatureDao;
import com.gisgraphy.domain.repository.IIdGenerator;
import com.gisgraphy.domain.repository.ISolRSynchroniser;
import com.gisgraphy.domain.valueobject.AlternateNameSource;
import com.gisgraphy.domain.valueobject.GISSource;
import com.gisgraphy.domain.valueobject.NameValueDTO;
import com.gisgraphy.domain.valueobject.Output;
import com.gisgraphy.domain.valueobject.Output.OutputStyle;
import com.gisgraphy.fulltext.FullTextSearchEngine;
import com.gisgraphy.helper.GeolocHelper;
import com.gisgraphy.helper.StringHelper;
import com.vividsolutions.jts.geom.Point;

/**
 * Import the POI from an (pre-processed) openStreet map data file.
 * The goal of this importer is to cross information between geonames and Openstreetmap. 
 * 
 * 
 * @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a>
 */
public class OpenStreetMapPoisSimpleImporter extends AbstractSimpleImporterProcessor {

    public static final int DISTANCE = 40000;

    protected static final Logger logger = LoggerFactory.getLogger(OpenStreetMapPoisSimpleImporter.class);

    public static final Output MINIMUM_OUTPUT_STYLE = Output.withDefaultFormat().withStyle(OutputStyle.SHORT);

    private static final Pattern pattern = Pattern.compile("(\\w+)\\s\\d+.*",
            Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);

    public static final String ALTERNATENAMES_EXTRACTION_REGEXP = "((?:(?!___).)+)(?:(?:___)|(?:$))";

    public static final Pattern ALTERNATENAMES_EXTRACTION_PATTERN = Pattern
            .compile(ALTERNATENAMES_EXTRACTION_REGEXP);

    @Autowired
    protected IIdGenerator idGenerator;

    @Autowired
    protected IGisFeatureDao gisFeatureDao;

    @Autowired
    protected ISolRSynchroniser solRSynchroniser;

    OsmAmenityToPlacetype osmAmenityToPlacetype = new OsmAmenityToPlacetype();

    @Autowired
    protected ICityDao cityDao;

    protected boolean shouldFillIsInField() {
        return importerConfig.isGeonamesImporterEnabled() && importerConfig.isOpenStreetMapFillIsIn();
    }

    /* (non-Javadoc)
     * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#flushAndClear()
     */
    @Override
    protected void flushAndClear() {
        gisFeatureDao.flushAndClear();
    }

    @Override
    protected void setup() {
        super.setup();
        //temporary disable logging when importing
        FullTextSearchEngine.disableLogging = true;
        idGenerator.sync();
    }

    /* (non-Javadoc)
     * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getFiles()
     */
    @Override
    protected File[] getFiles() {
        return ImporterHelper.listCountryFilesToImport(importerConfig.getOpenStreetMapPoisDir());
    }

    /* (non-Javadoc)
     * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getNumberOfColumns()
     */
    @Override
    protected int getNumberOfColumns() {
        return 7;
    }

    /* (non-Javadoc)
     * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#processData(java.lang.String)
     */
    @Override
    protected void processData(String line) throws ImporterException {
        String[] fields = line.split("\t");
        String amenityFields = null;

        //
        // Line table has the following fields :
        // --------------------------------------------------- 
        //0 : Node type; 1 : id; 2 : name; 3 : countrycode;4 : alternatenames; 
        //5 : location,   6 : amenity;
        //
        //
        checkNumberOfColumn(fields);
        //amenity
        if (!isEmptyField(fields, 6, true)) {
            amenityFields = fields[6].trim();
        }

        List<GisFeature> pois = createAndpopulatePoi(fields, amenityFields);
        if (pois == null) {
            return;
        }
        try {
            for (GisFeature poi : pois) {
                gisFeatureDao.save(poi);
            }
        } catch (ConstraintViolationException e) {
            logger.error("Can not save " + dumpFields(fields)
                    + "(ConstraintViolationException) we continue anyway but you should consider this", e);
        } catch (Exception e) {
            logger.error("Can not save " + dumpFields(fields) + " we continue anyway but you should consider this",
                    e);
        }

    }

    List<GisFeature> createAndpopulatePoi(String[] fields, String amenity) {
        String[] tags = splitTags(amenity);
        List<GisFeature> pois = osmAmenityToPlacetype.getObjectsFromTags(tags);
        for (GisFeature poi : pois) {
            poi.setSource(GISSource.OSM);
            //osmId
            if (!isEmptyField(fields, 1, true)) {
                String osmIdAsString = fields[1].trim();
                Long osmId;
                try {
                    osmId = Long.parseLong(osmIdAsString);
                    poi.setOpenstreetmapId(osmId);
                } catch (NumberFormatException e) {
                    logger.error("can not parse openstreetmap id " + osmIdAsString);
                    return null;
                }
            }

            // name
            if (!isEmptyField(fields, 2, false)) {
                String name = fields[2].trim();
                if (name.length() >= GisFeature.NAME_MAX_LENGTH) {
                    logger.warn(name + " is a too long");
                    return null;
                }
                if (name == null || "".equals(name.trim()) || "\"\"".equals(name.trim())) {
                    poi.setName(StringHelper.splitCamelCase(PostOffice.class.getSimpleName()).toLowerCase());//set a default name
                }
                poi.setName(name);
            } else {
                poi.setName(StringHelper.splitCamelCase(PostOffice.class.getSimpleName()).toLowerCase());//set a default name
            }

            //countrycode
            if (!isEmptyField(fields, 3, true)) {
                String countryCode = fields[3].trim().toUpperCase();
                poi.setCountryCode(countryCode);
            }

            //populate alternatenames
            if (!isEmptyField(fields, 4, false)) {
                String alternateNamesAsString = fields[4].trim();
                populateAlternateNames(poi, alternateNamesAsString);
            }

            if (shouldFillIsInField()) {
                //we try to process is_in fields, because we want to fill adm and zip too
                setIsInFields(poi);
            }

            //location
            if (!isEmptyField(fields, 5, false)) {
                try {
                    Point location = (Point) GeolocHelper.convertFromHEXEWKBToGeometry(fields[5]);
                    poi.setLocation(location);
                } catch (RuntimeException e) {
                    logger.warn("can not parse location for " + fields[6] + " : " + e);
                    return null;
                }
            } else {
                return null;
            }

            //featureId
            poi.setFeatureId(idGenerator.getNextFeatureId());
        }
        return pois;
    }

    protected String[] splitTags(String amenity) {
        String[] tags = new String[14];
        String[] tagsvalues = amenity.split("___");
        //System.out.println(tagsvalues.length);
        for (int j = 0; j < tagsvalues.length; j++) {
            //   System.err.println(j+"="+tagsvalues[j]);
            if (!"".equals(tagsvalues[j].trim())) {
                tags[j] = tagsvalues[j];
            }
        }
        return tags;
    }

    protected void setIsInFields(GisFeature poi) {
        if (poi != null && poi.getLocation() != null) {
            //first searchByShape because it is the more reliable :
            City cityByShape = cityDao.getByShape(poi.getLocation(), poi.getCountryCode(), true);
            if (cityByShape != null) {
                poi.setIsIn(cityByShape.getName());
                poi.setPopulation(cityByShape.getPopulation());
                if (cityByShape.getZipCodes() != null) {
                    for (ZipCode zip : cityByShape.getZipCodes()) {
                        poi.addZip(zip.getCode());
                    }
                }
                if (cityByShape.getAlternateNames() != null) {
                    for (AlternateName name : cityByShape.getAlternateNames()) {
                        if (name != null && name.getName() != null) {
                            poi.addIsInCitiesAlternateName(name.getName());
                        }
                    }
                }
                if (cityByShape.getAdm() != null) {
                    poi.setIsInAdm(cityByShape.getAdm().getName());
                }
                return;
            }
            City city = getNearestCity(poi.getLocation(), poi.getCountryCode(), true);
            if (city != null) {
                poi.setPopulation(city.getPopulation());
                poi.setIsInAdm(getDeeperAdmName(city));
                if (city.getZipCodes() != null) {
                    for (ZipCode zip : city.getZipCodes()) {
                        if (zip != null && zip.getCode() != null) {
                            poi.addZip(zip.getCode());
                        }
                    }
                }
                if (city.getName() != null && poi.getIsIn() == null) {//only if it has not be set by the openstreetmap is_in field
                    //we can here have some concordance problem if the city found is not the one populate in the osm is_in fields.
                    poi.setIsIn(pplxToPPL(city.getName()));
                }
                if (city.getAlternateNames() != null) {
                    for (AlternateName name : city.getAlternateNames()) {
                        if (name != null && name.getName() != null) {
                            poi.addIsInCitiesAlternateName(name.getName());
                        }
                    }
                }
            }
            City city2 = getNearestCity(poi.getLocation(), poi.getCountryCode(), false);
            if (city2 != null) {
                if (city != null) {
                    if (city.getFeatureId() == city2.getFeatureId()) {
                        return;
                    }
                    if (city2.getLocation() != null && city.getLocation() != null
                            && GeolocHelper.distance(poi.getLocation(), city2.getLocation()) > GeolocHelper
                                    .distance(poi.getLocation(), city.getLocation())) {
                        return;
                    }
                }
                //we got a non municipality that is nearest, we set isinPlace tag and update is_in if needed
                if (city2.getPopulation() != null && city2.getPopulation() != 0
                        && (poi.getPopulation() == null || poi.getPopulation() == 0)) {
                    poi.setPopulation(city2.getPopulation());
                }

                if (poi.getIsIn() == null) {
                    poi.setIsIn(pplxToPPL(city2.getName()));
                } else {
                    poi.setIsInPlace(pplxToPPL(city2.getName()));
                }
                if (poi.getIsInAdm() == null) {
                    poi.setIsInAdm(getDeeperAdmName(city2));
                }
                if (city2.getZipCodes() != null) {//we merge the zipcodes for is_in and is_in_place, so we don't check
                    //if zipcodes are already filled
                    for (ZipCode zip : city2.getZipCodes()) {
                        if (zip != null && zip.getCode() != null) {
                            poi.addZip(zip.getCode());
                        }
                    }
                }
                if (city == null && city2 != null) {//add AN only if there are not added yet
                    if (city2.getAlternateNames() != null) {
                        for (AlternateName name : city2.getAlternateNames()) {
                            if (name != null && name.getName() != null) {
                                poi.addIsInCitiesAlternateName(name.getName());
                            }
                        }
                    }
                }
            }
        }
    }

    /**
    *  tests if city is a paris district, if so it is
     probably a pplx that is newly considered as ppl
     http://forum.geonames.org/gforum/posts/list/2063.page
    */
    protected String pplxToPPL(String cityName) {
        if (cityName != null) {
            Matcher matcher = pattern.matcher(cityName);
            if (matcher.find()) {
                return matcher.group(1);
            } else {
                return cityName;
            }
        } else {
            return cityName;
        }
    }

    protected City getNearestCity(Point location, String countryCode, boolean filterMunicipality) {
        if (location == null) {
            return null;
        }
        return cityDao.getNearest(location, countryCode, filterMunicipality, DISTANCE);
    }

    protected String getDeeperAdmName(City city) {
        if (city != null) {
            if (city.getAdm5Name() != null) {
                return city.getAdm5Name();
            } else if (city.getAdm4Name() != null) {
                return city.getAdm4Name();
            } else if (city.getAdm3Name() != null) {
                return city.getAdm3Name();
            } else if (city.getAdm2Name() != null) {
                return city.getAdm2Name();
            } else if (city.getAdm1Name() != null) {
                return city.getAdm1Name();
            } else {
                return null;
            }
        } else {
            return null;
        }
    }

    GisFeature populateAlternateNames(GisFeature poi, String alternateNamesAsString) {
        if (poi == null || alternateNamesAsString == null) {
            return poi;
        }
        Matcher matcher = ALTERNATENAMES_EXTRACTION_PATTERN.matcher(alternateNamesAsString);
        int i = 0;
        while (matcher.find()) {
            if (matcher.groupCount() != 1) {
                logger.warn(
                        "wrong number of fields for alternatename no " + i + "for line " + alternateNamesAsString);
                continue;
            }
            String alternateName = matcher.group(1);
            if (alternateName != null && !"".equals(alternateName.trim())) {
                poi.addAlternateName(new AlternateName(alternateName, AlternateNameSource.OPENSTREETMAP));
            }
        }
        return poi;

    }

    /* (non-Javadoc)
      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldBeSkiped()
      */
    @Override
    public boolean shouldBeSkipped() {
        return !importerConfig.isOpenstreetmapImporterEnabled();
    }

    /* (non-Javadoc)
     * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#setCommitFlushMode()
     */
    @Override
    protected void setCommitFlushMode() {
        this.gisFeatureDao.setFlushMode(FlushMode.COMMIT);
    }

    /* (non-Javadoc)
     * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreComments()
     */
    @Override
    protected boolean shouldIgnoreComments() {
        return true;
    }

    /* (non-Javadoc)
     * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreFirstLine()
     */
    @Override
    protected boolean shouldIgnoreFirstLine() {
        return false;
    }

    /* (non-Javadoc)
     * @see com.gisgraphy.domain.geoloc.importer.IGeonamesProcessor#rollback()
     */
    public List<NameValueDTO<Integer>> rollback() {
        List<NameValueDTO<Integer>> deletedObjectInfo = new ArrayList<NameValueDTO<Integer>>();
        logger.info("reseting openstreetmap cities...");
        //TODO only POI that have source openstreetmap
        deletedObjectInfo.add(new NameValueDTO<Integer>(City.class.getSimpleName(), 0));
        resetStatus();
        return deletedObjectInfo;
    }

    @Override
    //TODO test
    protected void tearDown() {
        super.tearDown();
        String savedMessage = this.statusMessage;
        try {
            FullTextSearchEngine.disableLogging = true;
            this.statusMessage = internationalisationService.getString("import.fulltext.optimize");
            solRSynchroniser.optimize();
        } finally {
            // we restore message in case of error
            this.statusMessage = savedMessage;
        }
    }

    @Required
    public void setSolRSynchroniser(ISolRSynchroniser solRSynchroniser) {
        this.solRSynchroniser = solRSynchroniser;
    }

    @Required
    public void setIdGenerator(IIdGenerator idGenerator) {
        this.idGenerator = idGenerator;
    }

    public void setGisFeatureDao(IGisFeatureDao gisFeatureDao) {
        this.gisFeatureDao = gisFeatureDao;
    }

    @Required
    public void setCityDao(ICityDao cityDao) {
        this.cityDao = cityDao;
    }

}