com.gisgraphy.importer.GeonamesAdmExtracter.java Source code

Java tutorial

Introduction

Here is the source code for com.gisgraphy.importer.GeonamesAdmExtracter.java

Source

/*******************************************************************************
 *   Gisgraphy Project 
 * 
 *   This library is free software; you can redistribute it and/or
 *   modify it under the terms of the GNU Lesser General Public
 *   License as published by the Free Software Foundation; either
 *   version 2.1 of the License, or (at your option) any later version.
 * 
 *   This library is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 *   Lesser General Public License for more details.
 * 
 *   You should have received a copy of the GNU Lesser General Public
 *   License along with this library; if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
 * 
 *  Copyright 2008  Gisgraphy project 
 *  David Masclet <davidmasclet@gisgraphy.com>
 *  
 *  
 *******************************************************************************/
package com.gisgraphy.importer;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;

import com.gisgraphy.domain.valueobject.Constants;
import com.gisgraphy.domain.valueobject.NameValueDTO;

// TODO v2 a factory and the ability to extract a specific featureclasscode
/**
 * Extract 4 files in CSV format in order to import Adm. This files will be in
 * the same format as the Geonames Adm1Codes.txt file
 * 
 * @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a>
 */
public class GeonamesAdmExtracter extends AbstractSimpleImporterProcessor {

    protected static final Logger logger = LoggerFactory.getLogger(GeonamesAdmExtracter.class);

    private File adm1file;

    private File adm2file;

    private File adm3file;

    private File adm4file;

    private OutputStreamWriter adm1fileOutputStreamWriter;

    private OutputStreamWriter adm2fileOutputStreamWriter;

    private OutputStreamWriter adm3fileOutputStreamWriter;

    private OutputStreamWriter adm4fileOutputStreamWriter;

    private SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd-HH-mm:ss");

    private StringBuffer sb = new StringBuffer();

    @Autowired
    private GeonamesAdm1Importer geonamesAdm1Importer;

    @Autowired
    private GeonamesAdm2Importer geonamesAdm2Importer;

    @Autowired
    private GeonamesAdm3Importer geonamesAdm3Importer;

    @Autowired
    private GeonamesAdm4Importer geonamesAdm4Importer;

    /**
     * Default Constructor
     */
    public GeonamesAdmExtracter() {
        super();

    }

    /*
     * (non-Javadoc)
     * 
     * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#processData(java.lang.String)
     */
    @Override
    protected void processData(String line) {
        String[] fields = line.split("\t");

        /*
         * line table has the following fields :
         * --------------------------------------------------- 0 geonameid : 1
         * name 2 asciiname 3 alternatenames 4 latitude 5 longitude 6 feature
         * class 7 feature code 8 country code 9 cc2 10 admin1 code 11 admin2
         * code 12 admin3 code 13 admin4 code 14 population 15 elevation 16
         * gtopo30 17 timezone 18 modification date last modification in
         * yyyy-MM-dd format
         */

        // isEmptyField(fields,0,true);
        // isEmptyField(fields,1,true);
        checkNumberOfColumn(fields);
        if (!isEmptyField(fields, 6, false) && !isEmptyField(fields, 7, false)) {
            // fields = ImporterHelper.virtualizeADMD(fields);

            if (checkAdmTypeAndLevel(1, fields[6], fields[7])) {
                processAdm1ToGeonamesExportFormat(line);
            } else if (checkAdmTypeAndLevel(2, fields[6], fields[7])) {
                processAdm2ToGeonamesExportFormat(line);
            } else if (checkAdmTypeAndLevel(3, fields[6], fields[7])) {
                processAdm3ToGeonamesExportFormat(line);
            } else if (checkAdmTypeAndLevel(4, fields[6], fields[7])) {
                processAdm4ToGeonamesExportFormat(line);
            }
        } else {
            logger.info("featureid " + fields[0] + " has featurecode or featureclass with a null value");
        }
    }

    /* (non-Javadoc)
     * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldBeSkiped()
     */
    @Override
    public boolean shouldBeSkipped() {
        return !importerConfig.isGeonamesImporterEnabled();
    }

    @Override
    protected void onFileProcessed(File file) {
        //we overrride because we don't want to rename files
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#tearDown()
     */
    @Override
    protected void tearDown() {
        super.tearDown();
        closeOutputStreams();
        // Force number of line to be processed after extract
        if (importerConfig.getAdmExtracterStrategyOptionsForAdm(1) != AdmExtracterStrategyOptions.skip) {
            geonamesAdm1Importer.numberOfLinesToProcess = 0;
        }
        if (importerConfig.getAdmExtracterStrategyOptionsForAdm(2) != AdmExtracterStrategyOptions.skip) {
            geonamesAdm2Importer.numberOfLinesToProcess = 0;
        }
        if (importerConfig.getAdmExtracterStrategyOptionsForAdm(3) != AdmExtracterStrategyOptions.skip) {
            geonamesAdm3Importer.numberOfLinesToProcess = 0;
        }
        if (importerConfig.getAdmExtracterStrategyOptionsForAdm(4) != AdmExtracterStrategyOptions.skip) {
            geonamesAdm4Importer.numberOfLinesToProcess = 0;
        }
    }

    /**
     * @param fields
     *                The array of fields for the current read line Process the
     *                line and write it in Geonames CSV format to the Adm4 file
     */
    private void processAdm4ToGeonamesExportFormat(String line) {
        if (adm4fileOutputStreamWriter != null) {
            String[] fields = line.split("\t");
            fields = ImporterHelper.correctLastAdmCodeIfPossible(fields);
            String stringToWrite = "";
            if (!isEmptyField(fields, 8, true) && !isEmptyField(fields, 10, true) && !isEmptyField(fields, 11, true)
                    && !isEmptyField(fields, 12, true) && !isEmptyField(fields, 13, true)
                    && !isEmptyField(fields, 1, true)) {
                sb = sb.delete(0, sb.length());
                //trim name
                fields[1] = fields[1].trim();
                String lineToWrite = StringUtils.join(fields, "\t");
                sb = sb.append(lineToWrite).append("\r\n");
                stringToWrite = sb.toString();
                try {
                    adm4fileOutputStreamWriter.write(stringToWrite);
                    flushAndClear();
                } catch (IOException e) {
                    throw new RuntimeException("An error has occurred when writing in adm4 file", e);
                }
            }
        }
    }

    /**
     * @param fields
     *                The array of fields for the current read line Process the
     *                line and write it in Geonames CSV format to the Adm3 file
     */
    private void processAdm3ToGeonamesExportFormat(String line) {
        if (adm3fileOutputStreamWriter != null) {
            String[] fields = line.split("\t");
            fields = ImporterHelper.correctLastAdmCodeIfPossible(fields);
            String stringToWrite = "";
            if (!isEmptyField(fields, 8, true) && !isEmptyField(fields, 10, true) && !isEmptyField(fields, 11, true)
                    && !isEmptyField(fields, 12, true) && !isEmptyField(fields, 1, true)) {
                sb = sb.delete(0, sb.length());
                //trim name
                fields[1] = fields[1].trim();
                String lineToWrite = StringUtils.join(fields, "\t");
                sb = sb.append(lineToWrite).append("\r\n");
                stringToWrite = sb.toString();
                try {
                    adm3fileOutputStreamWriter.write(stringToWrite);
                    flushAndClear();
                } catch (IOException e) {
                    throw new RuntimeException("an error has occurred when writing in adm3 file", e);
                }
            }
        }
    }

    /**
     * @param fields
     *                The array of fields for the current read line Process the
     *                line and write it in Geonames CSV format to the Adm2 file
     *                The adm2 format is different from Adm1 ,3 and 4 because
     *                Ascii name and FeatureId are also exported
     */
    private void processAdm2ToGeonamesExportFormat(String line) {
        if (adm2fileOutputStreamWriter != null) {
            String[] fields = line.split("\t");
            fields = ImporterHelper.correctLastAdmCodeIfPossible(fields);
            String stringToWrite = "";
            if (!isEmptyField(fields, 8, true) && !isEmptyField(fields, 10, true) && !isEmptyField(fields, 11, true)
                    && !isEmptyField(fields, 1, true) && !isEmptyField(fields, 0, true)) {
                sb = sb.delete(0, sb.length());
                //trim name
                fields[1] = fields[1].trim();
                String lineToWrite = StringUtils.join(fields, "\t");
                sb = sb.append(lineToWrite).append("\r\n");
                stringToWrite = sb.toString();
                try {
                    adm2fileOutputStreamWriter.write(stringToWrite);
                    flushAndClear();
                } catch (IOException e) {
                    throw new RuntimeException("an error has occurred when writing in adm4 file", e);
                }
            }
        }
    }

    /**
     * @param fields
     *                The array of fields for the current read line Process the
     *                line and write it in Geonames CSV format to the Adm1 file
     */
    private void processAdm1ToGeonamesExportFormat(String line) {
        if (adm1fileOutputStreamWriter != null) {
            String[] fields = line.split("\t");
            fields = ImporterHelper.correctLastAdmCodeIfPossible(fields);
            String stringToWrite = "";
            if (!isEmptyField(fields, 8, true) && !isEmptyField(fields, 10, true)
                    && !isEmptyField(fields, 1, true)) {
                sb = sb.delete(0, sb.length());
                //trim name
                fields[1] = fields[1].trim();
                String lineToWrite = StringUtils.join(fields, "\t");
                sb = sb.append(lineToWrite).append("\r\n");
                stringToWrite = sb.toString();
                try {
                    adm1fileOutputStreamWriter.write(stringToWrite);
                    flushAndClear();
                } catch (IOException e) {
                    throw new RuntimeException("an error has occurred when writing in adm4 file", e);
                }
            }
        }
    }

    private void closeOutputStreams() {
        if (adm1fileOutputStreamWriter != null) {
            try {
                adm1fileOutputStreamWriter.close();
            } catch (IOException e) {
                throw new RuntimeException("can not close adm1 outputStream", e);
            }
        }
        if (adm2fileOutputStreamWriter != null) {
            try {
                adm2fileOutputStreamWriter.close();
            } catch (IOException e) {
                throw new RuntimeException("can not close adm2 outputStream", e);
            }
        }
        if (adm3fileOutputStreamWriter != null) {
            try {
                adm3fileOutputStreamWriter.close();
            } catch (IOException e) {
                throw new RuntimeException("can not close adm3 outputStream", e);
            }
        }
        if (adm4fileOutputStreamWriter != null) {
            try {
                adm4fileOutputStreamWriter.close();
            } catch (IOException e) {
                throw new RuntimeException("can not close adm4 outputStream", e);
            }
        }
    }

    private boolean checkAdmTypeAndLevel(int expectedLevel, String featureClass, String featureCode) {
        if (featureClass.equals("A") && featureCode.startsWith("ADM") && featureCode.endsWith(expectedLevel + "")) {
            return true;
        }
        return false;
    }

    private OutputStreamWriter getWriter(File file, int admLevel) throws FileNotFoundException {
        OutputStream o = null;
        OutputStreamWriter w = null;
        try {
            if (!file.exists() || (file.exists() && importerConfig
                    .getAdmExtracterStrategyOptionsForAdm(admLevel) == AdmExtracterStrategyOptions.reprocess)) {
                o = new BufferedOutputStream(new FileOutputStream(file));
                w = new OutputStreamWriter(o, Constants.CHARSET);
                return w;
            } else {
                // file exists
                if (importerConfig
                        .getAdmExtracterStrategyOptionsForAdm(admLevel) == AdmExtracterStrategyOptions.backup) {
                    o = new BufferedOutputStream(new FileOutputStream(createFileAndBackupIfAlreadyExists(file)));
                    w = new OutputStreamWriter(o, Constants.CHARSET);
                    return w;
                } else {
                    // skip
                    return null;
                }

            }
        } catch (UnsupportedEncodingException e) {
            logger.warn("UnsupportedEncodingException for " + Constants.CHARSET + " : Can not extract Data");
            return null;
        }

    }

    private void initFiles() {
        adm1file = new File(importerConfig.getGeonamesDir() + importerConfig.getAdm1FileName());
        adm2file = new File(importerConfig.getGeonamesDir() + importerConfig.getAdm2FileName());
        adm3file = new File(importerConfig.getGeonamesDir() + importerConfig.getAdm3FileName());
        adm4file = new File(importerConfig.getGeonamesDir() + importerConfig.getAdm4FileName());
        try {
            adm1fileOutputStreamWriter = getWriter(adm1file, 1);
            adm2fileOutputStreamWriter = getWriter(adm2file, 2);
            adm3fileOutputStreamWriter = getWriter(adm3file, 3);
            adm4fileOutputStreamWriter = getWriter(adm4file, 4);
        } catch (FileNotFoundException e) {
            closeOutputStreams();
            throw new RuntimeException("An error has occurred during creation of outpuStream : " + e.getMessage(),
                    e);
        }
    }

    /**
     * 
     */
    private File createFileAndBackupIfAlreadyExists(File file) {
        if (file == null) {
            throw new ImporterException("Can not create or backup a null File ");
        }

        if (file.exists()) {
            checkWriteRights(file);
            // rename
            logger.info("File " + file.getName() + " already exists and will be renamed ");
            file.renameTo(new File(
                    importerConfig.getGeonamesDir() + file.getName() + "-" + sdf.format(new Date()) + ".bkup"));
        }
        try {
            // create
            file = new File(importerConfig.getGeonamesDir() + file.getName());
            file.createNewFile();
            checkWriteRights(file);
        } catch (IOException e) {
            throw new RuntimeException("An error has occurred during the creation of adm3file "
                    + importerConfig.getGeonamesDir() + file.getName(), e);
        }
        return file;
    }

    /**
     * @param file
     */
    private void checkWriteRights(File file) {
        if (!file.canWrite()) {
            throw new RuntimeException(
                    "you must have write rights in order to export adm in file " + file.getAbsolutePath());
        }
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#setup()
     */
    @Override
    public void setup() {
        super.setup();
        initFiles();
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreFirstLine()
     */
    @Override
    protected boolean shouldIgnoreFirstLine() {
        return false;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#shouldIgnoreComments()
     */
    @Override
    protected boolean shouldIgnoreComments() {
        return true;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#setCommitFlushMode()
     */
    @Override
    protected void setCommitFlushMode() {
        return;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#flushAndClear()
     */
    @Override
    protected void flushAndClear() {
        if (adm1fileOutputStreamWriter != null) {
            try {
                adm1fileOutputStreamWriter.flush();
            } catch (IOException e) {
                closeOutputStreams();
                throw new RuntimeException("can not flush adm1file : " + e.getMessage(), e);
            }
        }
        if (adm2fileOutputStreamWriter != null) {
            try {
                adm2fileOutputStreamWriter.flush();
            } catch (IOException e) {
                closeOutputStreams();
                throw new RuntimeException("can not flush adm2file : " + e.getMessage(), e);
            }
        }
        if (adm3fileOutputStreamWriter != null) {
            try {
                adm3fileOutputStreamWriter.flush();
            } catch (IOException e) {
                closeOutputStreams();
                throw new RuntimeException("can not flush adm3file : " + e.getMessage(), e);
            }
        }
        if (adm4fileOutputStreamWriter != null) {
            try {
                adm4fileOutputStreamWriter.flush();
            } catch (IOException e) {
                closeOutputStreams();
                throw new RuntimeException("can not flush adm4 file : " + e.getMessage(), e);
            }
        }
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getNumberOfColumns()
     */
    @Override
    protected int getNumberOfColumns() {
        return 19;
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.gisgraphy.domain.geoloc.importer.AbstractImporterProcessor#getFiles()
     */
    @Override
    protected File[] getFiles() {
        return ImporterHelper.listCountryFilesToImport(importerConfig.getGeonamesDir());
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.gisgraphy.domain.geoloc.importer.IGeonamesProcessor#rollback()
     */
    public List<NameValueDTO<Integer>> rollback() {
        List<NameValueDTO<Integer>> deletedObjectInfo = new ArrayList<NameValueDTO<Integer>>();
        adm1file = new File(importerConfig.getGeonamesDir() + importerConfig.getAdm1FileName());
        deleteFile(adm1file, deletedObjectInfo);
        adm2file = new File(importerConfig.getGeonamesDir() + importerConfig.getAdm2FileName());
        deleteFile(adm2file, deletedObjectInfo);
        adm3file = new File(importerConfig.getGeonamesDir() + importerConfig.getAdm3FileName());
        deleteFile(adm3file, deletedObjectInfo);
        adm4file = new File(importerConfig.getGeonamesDir() + importerConfig.getAdm4FileName());
        deleteFile(adm4file, deletedObjectInfo);
        resetStatus();
        return deletedObjectInfo;
    }

    private void deleteFile(File file, List<NameValueDTO<Integer>> deletedObjectInfo) {
        if (file.delete()) {
            deletedObjectInfo.add(new NameValueDTO<Integer>(file.getName(), 1));
            logger.info("File " + file.getName() + " has been deleted");
        } else {
            deletedObjectInfo.add(new NameValueDTO<Integer>(file.getName(), 0));
            logger.info("File " + file.getName() + " has not been deleted");
        }
    }

}