eu.sisob.uma.NPL.Researchers.Data.ViewCreator_CSVandSheets.java Source code

Java tutorial

Introduction

Here is the source code for eu.sisob.uma.NPL.Researchers.Data.ViewCreator_CSVandSheets.java

Source

/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
/*
Copyright (c) 2014 "(IA)2 Research Group. Universidad de Mlaga"
                    http://iaia.lcc.uma.es | http://www.uma.es
This file is part of SISOB Data Extractor.
SISOB Data Extractor is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
SISOB Data Extractor is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SISOB Data Extractor. If not, see <http://www.gnu.org/licenses/>.
*/

package eu.sisob.uma.NPL.Researchers.Data;

import au.com.bytecode.opencsv.CSVReader;
import au.com.bytecode.opencsv.CSVWriter;
import eu.sisob.uma.NPL.Researchers.ProjectLogger;
import eu.sisob.uma.api.prototypetextmining.globals.CVItemExtracted;
import eu.sisob.uma.api.prototypetextmining.globals.DataExchangeLiterals;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import javax.swing.table.DefaultTableModel;
import javax.swing.table.TableModel;
import org.jopendocument.dom.spreadsheet.Sheet;
import org.jopendocument.dom.spreadsheet.SpreadSheet;

/**
 *
 *** @author Daniel Lpez Gonzlez (dlopezgonzalez@gmail.com) for the SISOB PROJECT (http://sisob.lcc.uma.es/)
 */
public class ViewCreator_CSVandSheets {

    final static char csv_separator = ',';
    final static String agent_identification_file = "AgentIdentification.csv";
    final static String professional_activity_file = "ProfessionalActivity.csv";
    final static String accredited_university_studies_file = "AccreditedUniversityStudies.csv";
    final static String spreadsheet_w_all_data_file = "_DataExtracted.ods";

    /*
     * Auxiliar class to keep the csv data.
     */
    private static class ViewsExporterUnit {
        public ViewsExporterUnit(String tag, String filepath) {
            this.cache = new ArrayList<String[]>();
            this.map = new LinkedHashMap<String, String>();
            this.writer = null;
            this.tag = tag;
            this.filepath = filepath;
            this.lines = 0;
        }

        public void createMapIndex() {
            map_index = new LinkedHashMap<String, Integer>();
            int i = 0;
            for (String key : this.map.keySet()) {
                map_index.put(key, i + 1);
                i++;
            }
        }

        public void createNewCsv() throws IOException {
            OutputStreamWriter fw1 = new OutputStreamWriter(new FileOutputStream(filepath), "UTF-8");
            writer = new CSVWriter(fw1, csv_separator);
            String line[] = new String[map_index.size() + 1];

            line[0] = "ID";
            for (String key : this.map.keySet()) {
                line[map_index.get(key)] = map.get(key);
            }
            writer.writeNext(line);
            this.lines += 1;
        }

        public void AddCsv(String[] line) {
            writer.writeNext(line);
            this.lines += 1;
        }

        public void UpdateCsv(String id, String[] new_line) {
            String[] line_searched = null;
            for (String[] line : cache) {
                if (line[0].equals(id)) {
                    line_searched = line;
                    break;
                }
            }

            if (line_searched != null) {
                for (int i = 0; i < line_searched.length; i++) {
                    if (line_searched[i] == null || line_searched[i].equals(""))
                        line_searched[i] = new_line[i];
                }
            } else {
                cache.add(new_line);
            }
        }

        public String[] createNewLine(String id, LinkedHashMap<String, String> values) {
            String[] line = new String[map_index.size() + 1];
            line[0] = id;
            for (String value_name : values.keySet()) {
                if (map_index.containsKey(value_name))
                    line[map_index.get(value_name)] = values.get(value_name);
            }
            return line;
        }

        public void closeCsv() throws IOException {
            for (String[] line : cache) {
                writer.writeNext(line);
                this.lines += 1;
            }
            writer.close();
        }

        List<String[]> cache;
        LinkedHashMap<String, String> map;
        LinkedHashMap<String, Integer> map_index;

        CSVWriter writer;
        String filepath;
        String tag;
        int lines;
    }

    /*
     * Create a set of CSV of researcher cv data, also can create a spreadsheet file with jopendocument in the destination folder given
     * @param document xml document with data extracted with gate (researcher cv data)
     */
    /**
     *
     * @param document
     * @param dest
     * @param create_spreadsheet
     * @param debug_mode
     */
    public static void createViewFilesFromDataExtracted(org.dom4j.Document document, File dest,
            boolean create_spreadsheet, boolean debug_mode) {
        List<ViewsExporterUnit> units = new ArrayList<ViewsExporterUnit>();
        ViewsExporterUnit new_unit = null;
        //new_unit = new ViewsExporterUnit("AgentIdentification", dest.getAbsolutePath() + File.separator + agent_identification_file);
        new_unit = new ViewsExporterUnit(CVItemExtracted.AgentIdentification.class.getSimpleName(),
                dest.getAbsolutePath() + File.separator + agent_identification_file);

        //Put from XML. Remove "get"s and think that richer data machine will put new fields to xml                
        new_unit.map.put(CVItemExtracted.AgentIdentification.FirstFamilyName, "First Last Name"); //  "FirstFamilyName"
        new_unit.map.put(CVItemExtracted.AgentIdentification.SecondFamilyName, "Second Last Name");
        new_unit.map.put(CVItemExtracted.AgentIdentification.GivenName, "First Name");
        new_unit.map.put(CVItemExtracted.AgentIdentification.Gender, "Gender");
        new_unit.map.put(CVItemExtracted.AgentIdentification.Nationality, "Nationality");
        new_unit.map.put(CVItemExtracted.AgentIdentification.BirthCity, "Birth City");
        new_unit.map.put(CVItemExtracted.AgentIdentification.BirthRegion, "Birth Region");
        new_unit.map.put(CVItemExtracted.AgentIdentification.BirthCountry, "Birth Country");
        new_unit.map.put(CVItemExtracted.AgentIdentification.BirthDateDayMonthYear, "Birthday Day");
        new_unit.map.put(CVItemExtracted.AgentIdentification.BirthDateMonthYear, "Birthday Month");
        new_unit.map.put(CVItemExtracted.AgentIdentification.BirthDateYear, "Birthday Year");
        new_unit.map.put(CVItemExtracted.AgentIdentification.Email, "Email");
        new_unit.map.put(CVItemExtracted.AgentIdentification.Phone, "Phone");
        new_unit.createMapIndex();
        try {
            new_unit.createNewCsv();
        } catch (IOException ex) {
            ProjectLogger.LOGGER.error(ex.getMessage());
            return;
        }

        units.add(new_unit);
        //new_unit = new ViewsExporterUnit("ProfessionalActivity", dest.getAbsolutePath() + File.separator + professional_activity_file);        
        new_unit = new ViewsExporterUnit(CVItemExtracted.ProfessionalActivity.class.getSimpleName(),
                dest.getAbsolutePath() + File.separator + professional_activity_file);

        new_unit.map.put(CVItemExtracted.ProfessionalActivity.Title_name, "Literal Position Name");
        new_unit.map.put(CVItemExtracted.ProfessionalActivity.Position, "Position Name"); //MISS IN GATE
        new_unit.map.put(CVItemExtracted.ProfessionalActivity.PositionNumber, "Position Number");
        new_unit.map.put(CVItemExtracted.ProfessionalActivity.DateInit_dayMonthYear, "Start Date Day");
        new_unit.map.put(CVItemExtracted.ProfessionalActivity.DateInit_monthYear, "Start Date Month");
        new_unit.map.put(CVItemExtracted.ProfessionalActivity.DateInit_year, "Start Date Year");
        new_unit.map.put(CVItemExtracted.ProfessionalActivity.DateInit_duration, "Duration");
        new_unit.map.put(CVItemExtracted.ProfessionalActivity.Entity1_entityName, "Entity 1");
        new_unit.map.put(CVItemExtracted.ProfessionalActivity.Entity2_entityName, "Entity 2");
        new_unit.map.put(CVItemExtracted.ProfessionalActivity.Entity3_entityName, "Entity 3");
        //NEW
        //new_unit.map.put(CVItemExtracted.ProfessionalActivity., "Complete Instituion (e1+e2+e3)");
        new_unit.map.put(CVItemExtracted.ProfessionalActivity.PlaceJob_city, "City");
        new_unit.map.put(CVItemExtracted.ProfessionalActivity.PlaceJob_regionName, "Region");
        new_unit.map.put(CVItemExtracted.ProfessionalActivity.PlaceJob_regionCode, "Region Code");
        new_unit.map.put(CVItemExtracted.ProfessionalActivity.PlaceJob_countryName, "Country");
        new_unit.map.put(CVItemExtracted.ProfessionalActivity.PlaceJob_countryCode, "Country Code");
        new_unit.createMapIndex();
        try {
            new_unit.createNewCsv();
        } catch (IOException ex) {
            ProjectLogger.LOGGER.error(ex.getMessage());
            return;
        }

        units.add(new_unit);
        //new_unit = new ViewsExporterUnit("AccreditedUniversityStudies", dest.getAbsolutePath() + File.separator + accredited_university_studies_file);
        new_unit = new ViewsExporterUnit(CVItemExtracted.AccreditedUniversityStudies.class.getSimpleName(),
                dest.getAbsolutePath() + File.separator + accredited_university_studies_file);

        //NEW        
        //new_unit.map.put(CVItemExtracted.AccreditedUniversityStudies, "Type Degree");
        new_unit.map.put(CVItemExtracted.AccreditedUniversityStudies.Title_name, "Literal Study Name");
        new_unit.map.put(CVItemExtracted.AccreditedUniversityStudies.Position, "Study Name");
        //NEW
        new_unit.map.put(CVItemExtracted.AccreditedUniversityStudies.Hons, "hons");
        new_unit.map.put(CVItemExtracted.AccreditedUniversityStudies.DateTitle_dayMonthYear,
                "Acchievement Date Day");
        new_unit.map.put(CVItemExtracted.AccreditedUniversityStudies.DateTitle_MonthYear,
                "Acchievement Date Month");
        new_unit.map.put(CVItemExtracted.AccreditedUniversityStudies.DateTitle_year, "Acchievement Date Year");
        new_unit.map.put(CVItemExtracted.AccreditedUniversityStudies.Entity1_entityName, "Entity 1");
        new_unit.map.put(CVItemExtracted.AccreditedUniversityStudies.Entity2_entityName, "Entity 2");
        new_unit.map.put(CVItemExtracted.AccreditedUniversityStudies.Entity3_entityName, "Entity 3");
        //NEW
        //new_unit.map.put(CVItemExtracted.AccreditedUniversityStudies., "Complete Institution (e1+e2+e3)");
        new_unit.map.put(CVItemExtracted.AccreditedUniversityStudies.PlaceTitle_city, "City");
        new_unit.map.put(CVItemExtracted.AccreditedUniversityStudies.PlaceTitle_regionName, "Region");
        new_unit.map.put(CVItemExtracted.AccreditedUniversityStudies.PlaceTitle_regionCode, "Region Code");
        new_unit.map.put(CVItemExtracted.AccreditedUniversityStudies.PlaceTitle_countryName, "Country");
        new_unit.map.put(CVItemExtracted.AccreditedUniversityStudies.PlaceTitle_countryCode, "Country Code");
        new_unit.createMapIndex();
        try {
            new_unit.createNewCsv();
        } catch (IOException ex) {
            ProjectLogger.LOGGER.error(ex.getMessage());
            return;
        }

        units.add(new_unit);

        org.dom4j.Element root = document.getRootElement();
        boolean bLock = false;
        int count = 0;

        bLock = false;
        for (Iterator i = root.elementIterator("blockinfo"); i.hasNext();) {
            org.dom4j.Element elInfoBlock = (org.dom4j.Element) i.next();
            //            <blockinfo id_annotationrecollecting="default" id_entity="3626" URL="file:/...">
            //                <ProfessionalActivityNoCurrent>
            //                <Content>Research Fellow, University of Leicester (1984</Content>
            //                <DateInit_year>1984</DateInit_year>
            //                <Pattern>ProfessionalActivityPattern1</Pattern>
            //                <Entity_entityName>University of Leicester</Entity_entityName>
            //                <Title_name>Research Fellow</Title_name>
            //                </ProfessionalActivityNoCurrent>
            //                ...
            //            </blockinfo>
            String id_entity = elInfoBlock.attributeValue(DataExchangeLiterals.MIDDLE_ELEMENT_XML_ID_ENTITY_ATT);

            int countElements = 0;
            for (Object oCVNItem : elInfoBlock.elements()) {
                countElements++;
                org.dom4j.Element elCVNItem = (org.dom4j.Element) oCVNItem;
                String className = elCVNItem.getName();

                ViewsExporterUnit unit_ref = null;

                for (ViewsExporterUnit unit : units) {
                    if (className.startsWith(unit.tag)) {
                        unit_ref = unit;
                        break;
                    }
                }

                if (unit_ref == null) {
                    ProjectLogger.LOGGER.info(className + " has not views exporter unit");
                } else {
                    LinkedHashMap<String, String> values = new LinkedHashMap<String, String>();
                    for (Object oCVNItemField : elCVNItem.elements()) {
                        org.dom4j.Element elCVNItemField = (org.dom4j.Element) oCVNItemField;
                        String methodName = elCVNItemField.getName();
                        String value = "";
                        String value_name = "";

                        if (methodName.equals("Pattern")) {
                            if (debug_mode)
                                value = elCVNItemField.getText();
                        } else if (methodName.equals("Content")) {
                            if (debug_mode)
                                value = elCVNItemField.getText();
                        } else if ((methodName.equals("Domain"))) {
                            if (debug_mode)
                                value = elCVNItemField.getText();
                        } else {
                            value_name = elCVNItemField.getName();
                            value = elCVNItemField.getText();
                            values.put(value_name, value);
                        }
                    }

                    if (elCVNItem.attributeValue("action_mode").equals("")
                            || elCVNItem.attributeValue("action_mode").equals("add")) {
                        try {
                            String[] line = unit_ref.createNewLine(id_entity, values);
                            unit_ref.AddCsv(line);
                        } catch (Exception ex) {
                            ProjectLogger.LOGGER.error(ex.getMessage());
                        }
                    } else if (elCVNItem.attributeValue("action_mode").equals("overwrite")) {
                        try {
                            String[] line = unit_ref.createNewLine(id_entity, values);
                            unit_ref.UpdateCsv(id_entity, line);
                        } catch (Exception ex) {
                            ProjectLogger.LOGGER.error(ex.getMessage());
                        }
                    }
                }
            }
        }

        int max_r = 0;
        int max_c = 0;

        for (ViewsExporterUnit unit : units) {
            if (max_c < unit.map_index.size() + 1)
                max_c = unit.map_index.size() + 1;

            if (max_r < unit.lines)
                max_r = unit.lines;

            try {
                unit.closeCsv();
            } catch (IOException ex) {
                ProjectLogger.LOGGER.error(ex.getMessage());
            }
        }

        if (create_spreadsheet) {
            //Create open document spread sheet
            TableModel model = new DefaultTableModel(max_r, max_c);

            // Save the data to an ODS file and open it.        
            final File file = new File(dest.getAbsolutePath() + File.separator + spreadsheet_w_all_data_file);

            try {
                SpreadSheet.createEmpty(model).saveAs(file);

                SpreadSheet spread_sheet = SpreadSheet.createFromFile(file);

                int i = 0;
                for (ViewsExporterUnit unit : units) {
                    Sheet sheet = null;
                    if (i > 0) {
                        sheet = spread_sheet.addSheet(i, unit.tag);
                    } else {
                        sheet = spread_sheet.getSheet(i);
                        sheet.setName(unit.tag);
                    }

                    sheet.setRowCount(unit.lines);
                    sheet.setColumnCount(unit.map_index.size() + 1);

                    InputStreamReader fw1 = new InputStreamReader(new FileInputStream(unit.filepath), "UTF-8");
                    CSVReader reader = new CSVReader(fw1, csv_separator);

                    String[] line = null;
                    int r = 0;

                    while ((line = reader.readNext()) != null) {
                        for (int c = 0; c < line.length; c++) {
                            sheet.setValueAt(line[c], c, r);
                        }
                        r++;
                        //System.out.println(r);
                    }
                    i++;
                }

                spread_sheet.saveAs(file);

            } catch (FileNotFoundException ex) {
                ProjectLogger.LOGGER.error(ex.getMessage());
            } catch (IOException ex) {
                ProjectLogger.LOGGER.error(ex.getMessage());
            } catch (Exception ex) {
                ProjectLogger.LOGGER.error(ex.getMessage());
            } finally {

            }
        }

    }

}