org.ala.hbase.SpecimenHoldingLoader.java Source code

Java tutorial

Introduction

Here is the source code for org.ala.hbase.SpecimenHoldingLoader.java

Source

/***************************************************************************
 * Copyright (C) 2010 Atlas of Living Australia
 * All Rights Reserved.
 *
 * The contents of this file are subject to the Mozilla Public
 * License Version 1.1 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 ***************************************************************************/
package org.ala.hbase;

import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import javax.inject.Inject;
import org.ala.dao.InfoSourceDAO;
import org.ala.dao.TaxonConceptDao;
import org.ala.model.InfoSource;
import org.ala.model.SpecimenHolding;
import org.ala.util.SpringUtils;
import org.apache.log4j.Logger;
import org.springframework.context.ApplicationContext;
import org.springframework.stereotype.Component;

import au.com.bytecode.opencsv.CSVReader;
import au.org.ala.checklist.lucene.model.NameSearchResult;

/**
 * This class loads data from Botanical Gardens data file (data.csv) into the Cassandra
 * 
 * @author MOK011
 *
 */

@Component("specimenHoldingLoader")
public class SpecimenHoldingLoader {
    protected static Logger logger = Logger.getLogger(SpecimenHoldingLoader.class);
    private static final String INPUT_FILE_NAME = "/data/bie-staging/specimenHolding/";
    @Inject
    protected InfoSourceDAO infoSourceDao;
    @Inject
    protected TaxonConceptDao taxonConceptDao;

    public static enum BOTANTICAL_GARDENS_IDX {
        URL, INSTITUTION, SITE_NAME, FAMILY, GENUS, HYBRID_Q, SPECIES, SCIENCETIFIC_NAME, INFRASPECIFIC_Q, INFRASPECIFIC_NAME, CULTIVAR, COMMON_NAME, NOTES, COUNT, UNKNOWN
    }

    /**
     * Usage: inputFileName
     * 
     * @param args
     */
    public static void main(String[] args) {
        if (args.length < 1) {
            System.out.println("Input File Name Missing ....");
            System.exit(0);
        }
        System.out.println("Starting SpecimenHoldingLoader process.....");
        ApplicationContext context = SpringUtils.getContext();
        SpecimenHoldingLoader l = context.getBean(SpecimenHoldingLoader.class);
        try {
            System.out.println("Starting load process.....");
            if (args.length > 1) {
                l.load(args[0], Boolean.parseBoolean(args[1]));
            } else {
                l.load(args[0], true);
            }
            System.out.println("load process finished.....");
        } catch (Exception e) {
            e.printStackTrace();
            System.exit(1);
        }
        System.exit(0);
    }

    /**
     * data load process
     */
    private void load(String fileName, boolean append) {
        int ctr = 0;
        String[] nextLine = null;
        CSVReader reader = null;

        try {
            reader = new CSVReader(new FileReader(INPUT_FILE_NAME + fileName));
            //ignore first header line
            nextLine = reader.readNext();
            //first data line
            nextLine = reader.readNext();
            while (nextLine != null) {
                List<SpecimenHolding> list = new ArrayList<SpecimenHolding>();
                SpecimenHolding sh = toSpecimenHolding(nextLine);
                if (sh == null) {
                    logger.debug("*** SCIENCETIFIC_NAME: "
                            + nextLine[BOTANTICAL_GARDENS_IDX.SCIENCETIFIC_NAME.ordinal()] + ", guid: ");
                } else {
                    logger.debug(
                            "*** SCIENCETIFIC_NAME: " + nextLine[BOTANTICAL_GARDENS_IDX.SCIENCETIFIC_NAME.ordinal()]
                                    + ", guid: " + sh.getIdentifier());
                }
                nextLine = reader.readNext();
                // have guid & infosource ....
                if (sh != null) {
                    list.add(sh);
                    // more than one row have same guid
                    while (nextLine != null) {
                        SpecimenHolding nextKey = toSpecimenHolding(nextLine);
                        if (nextKey != null && sh.getIdentifier().equals(nextKey.getIdentifier())) {
                            if (!sh.equals(nextKey)) {
                                logger.debug("*** SCIENCETIFIC_NAME: "
                                        + nextLine[BOTANTICAL_GARDENS_IDX.SCIENCETIFIC_NAME.ordinal()] + ", guid: "
                                        + nextKey.getIdentifier());
                                list.add(nextKey);
                                sh = nextKey;
                            }
                            nextLine = reader.readNext();
                        } else {
                            break;
                        }
                    }
                }
                if (!list.isEmpty() && sh.getIdentifier() != null && sh.getIdentifier().length() > 0) {
                    try {
                        ctr += list.size();
                        if (append) {
                            //append the list into existing json.
                            taxonConceptDao.appendSpecimenHoldings(sh.getIdentifier(), list);
                        } else {
                            //overwrite the existing json.
                            taxonConceptDao.addSpecimenHoldings(sh.getIdentifier(), list);
                        }
                    } catch (Exception e) {
                        logger.error(e);
                        e.printStackTrace();
                    }
                }
            }
        } catch (IOException e) {
            logger.error(e);
            e.printStackTrace();
        } finally {
            System.out.println("*** Total Records Updated: " + ctr);
            if (reader != null) {
                try {
                    reader.close();
                } catch (IOException e) {
                    logger.error(e);
                }
            }
        }
    }

    /**
     * populate data into SpecimenHolding.
     * 
     * @param idLifeData csv data line
     * @return
     */
    private SpecimenHolding toSpecimenHolding(String[] data) {
        SpecimenHolding o = null;
        String tmp = null;
        String guid = "";

        // get guid & infosource
        if ("".equals(data[BOTANTICAL_GARDENS_IDX.SCIENCETIFIC_NAME.ordinal()].trim())) {
            String scientificName = "";
            if (data[BOTANTICAL_GARDENS_IDX.CULTIVAR.ordinal()].trim().startsWith("'")) {
                scientificName = data[BOTANTICAL_GARDENS_IDX.GENUS.ordinal()].trim() + " "
                        + data[BOTANTICAL_GARDENS_IDX.CULTIVAR.ordinal()].trim();
            } else {
                scientificName = data[BOTANTICAL_GARDENS_IDX.GENUS.ordinal()].trim() + " '"
                        + data[BOTANTICAL_GARDENS_IDX.CULTIVAR.ordinal()].trim() + "'";
            }
            try {
                NameSearchResult rs = taxonConceptDao.findCBDataByName(scientificName, null, null);
                logger.debug(
                        "*** findCBDataByName(SCIENCETIFIC_NAME): " + scientificName + ", NameSearchResult: " + rs);
                if (rs != null && "Genus".equalsIgnoreCase(rs.getRank().name())) {
                    guid = rs.getLsid();
                } else {
                    return o;
                }
            } catch (Exception e) {
                logger.error(e);
                return o;
            }
        } else {

            guid = taxonConceptDao.findLsidByName(data[BOTANTICAL_GARDENS_IDX.SCIENCETIFIC_NAME.ordinal()].trim());
        }

        InfoSource infosource = infoSourceDao.getByUri(data[BOTANTICAL_GARDENS_IDX.URL.ordinal()].trim());
        logger.debug("guid: " + guid + ", infosourceId: " + infosource);
        if (guid != null && guid.length() > 0 && infosource != null) {
            o = new SpecimenHolding();
            o.setInfoSourceId("" + infosource.getId());
            o.setInfoSourceName(infosource.getName());
            o.setInfoSourceURL(data[BOTANTICAL_GARDENS_IDX.URL.ordinal()].trim());
            o.setIdentifier(guid);

            //some data in csv file have less column, copy it into fix size array.
            String[] copy = new String[BOTANTICAL_GARDENS_IDX.values().length];
            for (int j = data.length; j < copy.length; j++) {
                copy[j] = "";
            }
            //copy data
            for (int i = 0; i < data.length; i++) {
                copy[i] = data[i];
            }

            o.setUrl(copy[BOTANTICAL_GARDENS_IDX.URL.ordinal()].trim());
            o.setInstitutionName(copy[BOTANTICAL_GARDENS_IDX.INSTITUTION.ordinal()].trim());
            o.setSiteName(copy[BOTANTICAL_GARDENS_IDX.SITE_NAME.ordinal()].trim());
            o.setFamily(copy[BOTANTICAL_GARDENS_IDX.FAMILY.ordinal()].trim());
            o.setGenus(copy[BOTANTICAL_GARDENS_IDX.GENUS.ordinal()].trim());
            o.setHybirdQualifier(copy[BOTANTICAL_GARDENS_IDX.HYBRID_Q.ordinal()].trim());
            o.setSpecies(copy[BOTANTICAL_GARDENS_IDX.SPECIES.ordinal()].trim());
            o.setScientificName(copy[BOTANTICAL_GARDENS_IDX.SCIENCETIFIC_NAME.ordinal()].trim());
            o.setInfraspecificQualifier(copy[BOTANTICAL_GARDENS_IDX.INFRASPECIFIC_Q.ordinal()].trim());
            o.setInfraspecificName(copy[BOTANTICAL_GARDENS_IDX.INFRASPECIFIC_NAME.ordinal()].trim());
            o.setCultivar(copy[BOTANTICAL_GARDENS_IDX.CULTIVAR.ordinal()].trim());
            o.setCommonName(copy[BOTANTICAL_GARDENS_IDX.COMMON_NAME.ordinal()].trim());
            o.setNotes(copy[BOTANTICAL_GARDENS_IDX.NOTES.ordinal()].trim());

            tmp = copy[BOTANTICAL_GARDENS_IDX.COUNT.ordinal()].trim();
            if (tmp != null && tmp.length() > 0) {
                o.setCount(Integer.parseInt(tmp));
            }
        } else {
            if (infosource == null) {
                logger.warn("Unable to find infosource : " + data[BOTANTICAL_GARDENS_IDX.URL.ordinal()].trim());
            } else {
                logger.warn("Unable to find LSID for '"
                        + data[BOTANTICAL_GARDENS_IDX.SCIENCETIFIC_NAME.ordinal()].trim() + "'");
            }
        }
        return o;
    }
}