com.gisgraphy.importer.OpenStreetMapSimpleImporter.java Source code

Java tutorial

Introduction

Here is the source code for com.gisgraphy.importer.OpenStreetMapSimpleImporter.java

Source

/*******************************************************************************
 *   Gisgraphy Project 
 * 
 *   This library is free software; you can redistribute it and/or
 *   modify it under the terms of the GNU Lesser General Public
 *   License as published by the Free Software Foundation; either
 *   version 2.1 of the License, or (at your option) any later version.
 * 
 *   This library is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 *   Lesser General Public License for more details.
 * 
 *   You should have received a copy of the GNU Lesser General Public
 *   License along with this library; if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
 * 
 *  Copyright 2008  Gisgraphy project 
 *  David Masclet <davidmasclet@gisgraphy.com>
 *  
 *  
 *******************************************************************************/
package com.gisgraphy.importer;

import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.hibernate.FlushMode;
import org.hibernate.exception.ConstraintViolationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Required;

import com.gisgraphy.domain.geoloc.entity.AlternateName;
import com.gisgraphy.domain.geoloc.entity.AlternateOsmName;
import com.gisgraphy.domain.geoloc.entity.City;
import com.gisgraphy.domain.geoloc.entity.CitySubdivision;
import com.gisgraphy.domain.geoloc.entity.OpenStreetMap;
import com.gisgraphy.domain.geoloc.entity.ZipCode;
import com.gisgraphy.domain.repository.ICityDao;
import com.gisgraphy.domain.repository.ICitySubdivisionDao;
import com.gisgraphy.domain.repository.IIdGenerator;
import com.gisgraphy.domain.repository.IOpenStreetMapDao;
import com.gisgraphy.domain.repository.ISolRSynchroniser;
import com.gisgraphy.domain.valueobject.AlternateNameSource;
import com.gisgraphy.domain.valueobject.NameValueDTO;
import com.gisgraphy.fulltext.FullTextSearchEngine;
import com.gisgraphy.geoloc.GeolocSearchEngine;
import com.gisgraphy.helper.GeolocHelper;
import com.gisgraphy.helper.StringHelper;
import com.gisgraphy.street.StreetType;
import com.vividsolutions.jts.geom.LineString;
import com.vividsolutions.jts.geom.Point;

/**
 * Import the street from an (pre-processed) openStreet map data file .
 * 
 * @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a>
 */
public class OpenStreetMapSimpleImporter extends AbstractSimpleImporterProcessor {

    protected static final Logger logger = LoggerFactory.getLogger(OpenStreetMapSimpleImporter.class);

    public static final int DISTANCE = 40000;

    @Autowired
    protected IIdGenerator idGenerator;

    @Autowired
    protected IOpenStreetMapDao openStreetMapDao;

    @Autowired
    protected ISolRSynchroniser solRSynchroniser;

    @Autowired
    protected IMunicipalityDetector municipalityDetector;

    @Autowired
    protected ICityDao cityDao;

    @Autowired
    protected ICitySubdivisionDao citySubdivisionDao;

    private static final Pattern pattern = Pattern.compile("(\\w+)\\s\\d+.*",
            Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);

    public static final String ALTERNATENAMES_EXTRACTION_REGEXP = "((?:(?!___).)+)(?:(?:___)|(?:$))";

    public static final Pattern ALTERNATENAMES_EXTRACTION_PATTERN = Pattern
            .compile(ALTERNATENAMES_EXTRACTION_REGEXP);

    public static final Float SUBURB_MAX_DISTANCE = 5000f;

    /* (non-Javadoc)
     * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#flushAndClear()
     */
    @Override
    protected void flushAndClear() {
        openStreetMapDao.flushAndClear();

    }

    @Override
    protected void setup() {
        super.setup();
        //temporary disable logging when importing
        FullTextSearchEngine.disableLogging = true;
        GeolocSearchEngine.disableLogging = true;
        logger.info("reseting Openstreetmap generatedId");
        idGenerator.sync();
    }

    /* (non-Javadoc)
     * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getFiles()
     */
    @Override
    protected File[] getFiles() {
        return ImporterHelper.listCountryFilesToImport(importerConfig.getOpenStreetMapDir());
    }

    /* (non-Javadoc)
     * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getNumberOfColumns()
     */
    @Override
    protected int getNumberOfColumns() {
        return 10;
    }

    /* (non-Javadoc)
     * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#processData(java.lang.String)
     */
    @Override
    protected void processData(String line) throws ImporterException {
        String[] fields = line.split("\t");

        //
        // Line table has the following fields :
        // --------------------------------------------------- 
        //0: id; 1 name; 2 location; 3 length ;4 countrycode; 5 : gid ;
        //6 type; 7 oneway; 8 : shape; 9 : Alternate names
        //
        checkNumberOfColumn(fields);
        OpenStreetMap street = new OpenStreetMap();

        // set id
        if (!isEmptyField(fields, 0, false)) {
            Long openstreetmapId = null;
            try {
                openstreetmapId = new Long(fields[0].trim());
            } catch (NumberFormatException e) {
                logger.warn("can not get openstreetmap id for " + fields[0]);
            }
            street.setOpenstreetmapId(openstreetmapId);
        }

        // set name
        if (!isEmptyField(fields, 1, false)) {
            street.setName(fields[1].trim());
            StringHelper.updateOpenStreetMapEntityForIndexation(street);
        }

        //location
        if (!isEmptyField(fields, 2, false)) {
            try {
                Point location = (Point) GeolocHelper.convertFromHEXEWKBToGeometry(fields[2]);
                street.setLocation(location);
            } catch (RuntimeException e) {
                logger.warn("can not parse location for " + fields[1] + " : " + e);
                return;
            }
        }

        if (!isEmptyField(fields, 3, false)) {
            street.setLength(new Double(fields[3].trim()));
        }

        if (!isEmptyField(fields, 8, true)) {
            try {
                street.setShape((LineString) GeolocHelper.convertFromHEXEWKBToGeometry(fields[8]));
            } catch (RuntimeException e) {
                logger.warn("can not parse shape for " + fields[8] + " : " + e);
                return;
            }

        }
        if (!isEmptyField(fields, 4, false)) {
            street.setCountryCode(fields[4].trim());
        }

        if (!isEmptyField(fields, 5, false)) {
            street.setIsIn(fields[5].trim());
        }
        if (shouldFillIsInField()) {
            //we try to process is_in fields, because we want to fill adm and zip too
            setIsInFields(street);
        }

        long generatedId = idGenerator.getNextGId();
        street.setGid(new Long(generatedId));

        if (!isEmptyField(fields, 6, false)) {
            StreetType type;
            try {
                type = StreetType.valueOf(fields[6].toUpperCase());
                street.setStreetType(type);
            } catch (Exception e) {
                logger.warn("can not determine streetType for " + fields[1] + " : " + e);
                street.setStreetType(StreetType.UNCLASSIFIED);
            }

        }

        if (!isEmptyField(fields, 7, false)) {
            boolean oneWay = false;
            try {
                oneWay = Boolean.valueOf(fields[7]);
                street.setOneWay(oneWay);
            } catch (Exception e) {
                logger.warn("can not determine oneway for " + fields[1] + " : " + e);
            }

        }

        if (fields.length == 10 && !isEmptyField(fields, 9, false)) {
            populateAlternateNames(street, fields[9]);
        }

        try {
            openStreetMapDao.save(street);
        } catch (ConstraintViolationException e) {
            logger.error("Can not save " + dumpFields(fields)
                    + "(ConstraintViolationException) we continue anyway but you should consider this", e);
        } catch (Exception e) {
            logger.error("Can not save " + dumpFields(fields) + " we continue anyway but you should consider this",
                    e);
        }

    }

    OpenStreetMap populateAlternateNames(OpenStreetMap street, String alternateNamesAsString) {
        if (street == null || alternateNamesAsString == null) {
            return street;
        }
        Matcher matcher = ALTERNATENAMES_EXTRACTION_PATTERN.matcher(alternateNamesAsString);
        int i = 0;
        while (matcher.find()) {
            if (matcher.groupCount() != 1) {
                logger.warn("wrong number of fields for street alternatename no " + i + "for line "
                        + alternateNamesAsString);
                continue;
            }
            String alternateName = matcher.group(1);
            if (alternateName != null && !"".equals(alternateName.trim())) {
                if (street.getName() == null) {
                    street.setName(alternateName);
                } else {
                    if (alternateName.contains(",") || alternateName.contains(";") || alternateName.contains(":")) {
                        String[] alternateNames = alternateName.split("[;\\:,]");
                        for (String name : alternateNames) {
                            street.addAlternateName(
                                    new AlternateOsmName(name.trim(), AlternateNameSource.OPENSTREETMAP));
                        }
                    } else {
                        street.addAlternateName(
                                new AlternateOsmName(alternateName.trim(), AlternateNameSource.OPENSTREETMAP));
                    }
                }
            }
        }
        return street;

    }

    protected void setIsInFields(OpenStreetMap street) {
        if (street != null && street.getLocation() != null) {
            //first search By Shape because it is the more reliable :
            City cityByShape = cityDao.getByShape(street.getLocation(), street.getCountryCode(), true);
            if (cityByShape != null) {
                street.setIsIn(cityByShape.getName());
                street.setCityId(cityByShape.getId());
                street.setCityConfident(true);
                street.setPopulation(cityByShape.getPopulation());
                if (cityByShape.getZipCodes() != null) {
                    for (ZipCode zip : cityByShape.getZipCodes()) {
                        street.addZip(zip.getCode());
                    }
                }
                if (cityByShape.getAlternateNames() != null) {
                    for (AlternateName name : cityByShape.getAlternateNames()) {
                        if (name != null && name.getName() != null) {
                            street.addIsInCitiesAlternateName(name.getName());
                        }
                    }
                }
                street.setIsInAdm(getDeeperAdmName(cityByShape));//cityByShape.getAdm().getName()
                setAdmNames(street, cityByShape);
                //set the is_in_place
                CitySubdivision subdivision = citySubdivisionDao.getByShape(street.getLocation(),
                        cityByShape.getCountryCode());
                if (subdivision != null) {
                    street.setIsInPlace(subdivision.getName());
                }
                return;
            }
            City city = getNearestCity(street.getLocation(), street.getCountryCode(), true);
            if (city != null) {
                street.setPopulation(city.getPopulation());
                street.setIsInAdm(getDeeperAdmName(city));
                setAdmNames(street, city);
                if (city.getZipCodes() != null) {
                    for (ZipCode zip : city.getZipCodes()) {
                        if (zip != null && zip.getCode() != null) {
                            street.addZip(zip.getCode());
                        }
                    }
                }
                if (city.getName() != null && street.getIsIn() == null) {//only if it has not be set by the openstreetmap is_in field
                    //we can here have some concordance problem if the city found is not the one populate in the osm is_in fields.
                    street.setIsIn(pplxToPPL(city.getName()));
                    street.setCityId(city.getId());
                }
                if (city.getAlternateNames() != null) {
                    for (AlternateName name : city.getAlternateNames()) {
                        if (name != null && name.getName() != null) {
                            street.addIsInCitiesAlternateName(name.getName());
                        }
                    }
                }
            }
            City city2 = getNearestCity(street.getLocation(), street.getCountryCode(), false);
            if (city2 != null) {
                if (city != null) {
                    if (city.getFeatureId() == city2.getFeatureId()) {
                        return;
                    }
                    if (city2.getLocation() != null && city.getLocation() != null
                            && GeolocHelper.distance(street.getLocation(), city2.getLocation()) > GeolocHelper
                                    .distance(street.getLocation(), city.getLocation())) {
                        return;
                    }
                }
                //we got a non municipality that is nearest, we set isinPlace tag and update is_in if needed
                if (city2.getPopulation() != null && city2.getPopulation() != 0
                        && (street.getPopulation() == null || street.getPopulation() == 0)) {
                    street.setPopulation(city2.getPopulation());
                }

                if (street.getIsIn() == null) {
                    street.setIsIn(pplxToPPL(city2.getName()));
                    street.setCityId(city2.getId());
                } else {
                    street.setIsInPlace(pplxToPPL(city2.getName()));
                }
                street.setIsInAdm(getDeeperAdmName(city2));
                setAdmNames(street, city2);
                if (city2.getZipCodes() != null) {//we merge the zipcodes for is_in and is_in_place, so we don't check
                    //if zipcodes are already filled
                    for (ZipCode zip : city2.getZipCodes()) {
                        if (zip != null && zip.getCode() != null) {
                            street.addZip(zip.getCode());
                        }
                    }
                }
                if (city == null && city2 != null) {//add AN only if there are not added yet
                    if (city2.getAlternateNames() != null) {
                        for (AlternateName name : city2.getAlternateNames()) {
                            if (name != null && name.getName() != null) {
                                street.addIsInCitiesAlternateName(name.getName());
                            }
                        }
                    }
                }
            }
        }
    }

    protected void setAdmNames(OpenStreetMap street, City city) {
        if (city != null) {
            if (city.getAdm5Name() != null) {
                street.setAdm5Name(city.getAdm5Name());
            }
            if (city.getAdm4Name() != null) {
                street.setAdm4Name(city.getAdm4Name());
            }
            if (city.getAdm3Name() != null) {
                street.setAdm3Name(city.getAdm3Name());
            }
            if (city.getAdm2Name() != null) {
                street.setAdm2Name(city.getAdm2Name());
            }
            if (city.getAdm1Name() != null) {
                street.setAdm1Name(city.getAdm1Name());
            }
        }
    }

    protected String getDeeperAdmName(City city) {
        if (city != null) {
            /*if (city.getAdm5Name() != null) {
               return city.getAdm5Name();
            }
            if (city.getAdm4Name() != null) {
               return city.getAdm4Name();
            }*/
            //we consider that level 4 and 5 are too precise and doesn't reflect the deeper adm
            if (city.getAdm3Name() != null) {
                return city.getAdm3Name();
            } else if (city.getAdm2Name() != null) {
                return city.getAdm2Name();
            } else if (city.getAdm1Name() != null) {
                return city.getAdm1Name();
            } else {
                return null;
            }
        } else {
            return null;
        }
    }

    protected City getNearestCity(Point location, String countryCode, boolean filterMunicipality) {
        if (location == null) {
            return null;
        }
        return cityDao.getNearest(location, countryCode, filterMunicipality, DISTANCE);
    }

    /**
     *  tests if city is a paris district, if so it is
      probably a pplx that is newly considered as ppl
      http://forum.geonames.org/gforum/posts/list/2063.page
     */
    protected String pplxToPPL(String cityName) {
        if (cityName != null) {
            Matcher matcher = pattern.matcher(cityName);
            if (matcher.find()) {
                return matcher.group(1);
            } else {
                return cityName;
            }
        } else {
            return cityName;
        }
    }

    /* (non-Javadoc)
      * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldBeSkiped()
      */
    @Override
    public boolean shouldBeSkipped() {
        return !importerConfig.isOpenstreetmapImporterEnabled();
    }

    /* (non-Javadoc)
     * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#setCommitFlushMode()
     */
    @Override
    protected void setCommitFlushMode() {
        this.openStreetMapDao.setFlushMode(FlushMode.COMMIT);
        this.cityDao.setFlushMode(FlushMode.COMMIT);
    }

    /* (non-Javadoc)
     * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreComments()
     */
    @Override
    protected boolean shouldIgnoreComments() {
        return true;
    }

    /* (non-Javadoc)
     * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreFirstLine()
     */
    @Override
    protected boolean shouldIgnoreFirstLine() {
        return false;
    }

    /* (non-Javadoc)
     * @see com.gisgraphy.domain.geoloc.importer.IGeonamesProcessor#rollback()
     */
    public List<NameValueDTO<Integer>> rollback() {
        List<NameValueDTO<Integer>> deletedObjectInfo = new ArrayList<NameValueDTO<Integer>>();
        logger.info("deleting openstreetmap entities...");
        int deleted = openStreetMapDao.deleteAll();
        if (deleted != 0) {
            deletedObjectInfo.add(
                    new NameValueDTO<Integer>(openStreetMapDao.getPersistenceClass().getSimpleName(), deleted));
        }
        logger.info(deleted + " openstreetmap entities have been deleted");
        resetStatus();
        return deletedObjectInfo;
    }

    @Override
    //TODO test
    protected void tearDown() {
        super.tearDown();
        FullTextSearchEngine.disableLogging = false;
        GeolocSearchEngine.disableLogging = false;
        String savedMessage = this.statusMessage;
        try {
            this.statusMessage = internationalisationService.getString("import.message.createIndex");
            openStreetMapDao.createSpatialIndexes();
            this.statusMessage = internationalisationService.getString("import.fulltext.optimize");
            solRSynchroniser.optimize();
        } catch (Exception e) {
            logger.error(
                    "an error occured during spatial index creation, we ignore it but you have to manually run it to have good performances : "
                            + e.getMessage(),
                    e);
        } finally {
            this.statusMessage = savedMessage;
        }
    }

    /**
     * overidded because alternatenames can be null so number of fields can differ
     * 
     * @see #getNumberOfColumns()
     * @param fields
     *                The array to check
     */
    @Override
    protected void checkNumberOfColumn(String[] fields) {
        if (fields.length != 9 && fields.length != 10) {

            throw new WrongNumberOfFieldsException("The number of fields is not correct. expected : "
                    + getNumberOfColumns() + ", founds :  " + fields.length + ". details :" + dumpFields(fields));
        }
    }

    protected boolean shouldFillIsInField() {
        return importerConfig.isGeonamesImporterEnabled() && importerConfig.isOpenStreetMapFillIsIn();
    }

    @Required
    public void setOpenStreetMapDao(IOpenStreetMapDao openStreetMapDao) {
        this.openStreetMapDao = openStreetMapDao;
    }

    @Required
    public void setSolRSynchroniser(ISolRSynchroniser solRSynchroniser) {
        this.solRSynchroniser = solRSynchroniser;
    }

    @Required
    public void setIdGenerator(IIdGenerator idGenerator) {
        this.idGenerator = idGenerator;
    }

    @Required
    public void setMunicipalityDetector(IMunicipalityDetector municipalityDetector) {
        this.municipalityDetector = municipalityDetector;
    }

    @Required
    public void setCityDao(ICityDao cityDao) {
        this.cityDao = cityDao;
    }

    @Required
    public void setCitySubdivisionDao(ICitySubdivisionDao citySubdivisionDao) {
        this.citySubdivisionDao = citySubdivisionDao;
    }

}