org.wandora.application.tools.extractors.excel.ExcelTopicNameExtractor.java Source code

Java tutorial

Introduction

Here is the source code for org.wandora.application.tools.extractors.excel.ExcelTopicNameExtractor.java

Source

/*
 * WANDORA
 * Knowledge Extraction, Management, and Publishing Application
 * http://wandora.org
 * 
 * Copyright (C) 2004-2016 Wandora Team
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 * 
 */

package org.wandora.application.tools.extractors.excel;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Map;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.wandora.application.Wandora;
import org.wandora.application.tools.GenericOptionsDialog;
import org.wandora.topicmap.Locator;
import org.wandora.topicmap.TMBox;
import org.wandora.topicmap.Topic;
import org.wandora.topicmap.TopicMap;
import org.wandora.topicmap.TopicMapException;
import org.wandora.topicmap.XTMPSI;

/**
 *
 * @author akivela
 */

public class ExcelTopicNameExtractor extends AbstractExcelExtractor {

    public static boolean FIRST_ROW_CONTAINS_LANGUAGES = true;
    public static boolean CREATE_MISSING_LANGUAGE_TOPICS = true;

    public static boolean ADD_DISPLAY_TO_SCOPE = true;
    public static boolean ADD_SORT_TO_SCOPE = false;

    private HashMap<String, String> languagesPerColumn = new HashMap();

    @Override
    public String getName() {
        return "Excel topic name extractor";
    }

    @Override
    public String getDescription() {
        return "Excel topic name extractor extracts variant names for first column topics.";
    }

    @Override
    public void processWorkbook(HSSFWorkbook workbook, TopicMap topicMap) {
        int numberOfSheets = workbook.getNumberOfSheets();
        for (int i = 0; i < numberOfSheets && !forceStop(); i++) {
            HSSFSheet sheet = workbook.getSheetAt(i);
            processSheet(sheet, topicMap);
        }
    }

    @Override
    public void processWorkbook(XSSFWorkbook workbook, TopicMap topicMap) {
        int numberOfSheets = workbook.getNumberOfSheets();
        for (int i = 0; i < numberOfSheets && !forceStop(); i++) {
            XSSFSheet sheet = workbook.getSheetAt(i);
            processSheet(sheet, topicMap);
        }
    }

    public void processSheet(HSSFSheet sheet, TopicMap tm) {
        Iterator<Row> rowIterator = sheet.iterator();
        boolean isFirst = true;
        languagesPerColumn = new HashMap();
        while (rowIterator.hasNext() && !forceStop()) {
            Row row = rowIterator.next();
            if (isFirst && FIRST_ROW_CONTAINS_LANGUAGES) {
                processRowAsLanguages(row, tm);
                isFirst = false;
            } else {
                processRow(row, tm);
            }
        }
    }

    public void processSheet(XSSFSheet sheet, TopicMap tm) {
        Iterator<Row> rowIterator = sheet.iterator();
        boolean isFirst = true;
        languagesPerColumn = new HashMap();
        while (rowIterator.hasNext() && !forceStop()) {
            Row row = rowIterator.next();
            if (isFirst && FIRST_ROW_CONTAINS_LANGUAGES) {
                processRowAsLanguages(row, tm);
                isFirst = false;
            } else {
                processRow(row, tm);
            }
        }
    }

    public void processRow(Row row, TopicMap tm) {
        try {
            Iterator<Cell> cellIterator = row.cellIterator();
            Topic topic = null;
            Cell firstCell = row.getCell(0);
            if (getCellValueAsString(firstCell) != null) {
                topic = getCellTopic(firstCell, tm);
            }
            if (topic != null) {
                while (cellIterator.hasNext() && !forceStop()) {
                    Cell cell = cellIterator.next();
                    if (cell.getColumnIndex() != 0) {
                        String name = getCellValueAsString(cell);
                        if (name != null) {
                            String langSI = languagesPerColumn.get(Integer.toString(cell.getColumnIndex()));
                            Topic lang = tm.getTopic(langSI);
                            if (lang == null)
                                lang = getDefaultLanguageTopic(cell, tm);
                            HashSet scope = new LinkedHashSet();
                            scope.add(lang);
                            if (ADD_DISPLAY_TO_SCOPE)
                                scope.add(tm.getTopic(XTMPSI.DISPLAY));
                            if (ADD_SORT_TO_SCOPE)
                                scope.add(tm.getTopic(XTMPSI.SORT));
                            topic.setVariant(scope, name);
                        }
                    }
                }
            }
        } catch (TopicMapException ex) {
            log(ex);
        } catch (Exception ex) {
            log(ex);
        }
    }

    public void processRowAsLanguages(Row row, TopicMap tm) {
        Iterator<Cell> cellIterator = row.cellIterator();
        while (cellIterator.hasNext()) {
            try {
                String langSI = null;
                Cell cell = cellIterator.next();
                String lang = getCellValueAsString(cell);
                if (lang != null) {
                    Topic langTopic = tm.getTopicWithBaseName(lang);
                    if (langTopic != null)
                        langSI = langTopic.getOneSubjectIdentifier().toExternalForm();
                    else
                        langSI = XTMPSI.getLang(lang);
                    langTopic = tm.getTopic(new Locator(langSI));
                    languagesPerColumn.put(Integer.toString(cell.getColumnIndex()), langSI);

                    if (langTopic == null && CREATE_MISSING_LANGUAGE_TOPICS) {
                        langTopic = tm.createTopic();
                        langTopic.addSubjectIdentifier(new Locator(langSI));
                        Topic langTypeTopic = tm.getTopic(XTMPSI.LANGUAGE);
                        if (langTypeTopic != null) {
                            langTopic.addType(langTypeTopic);
                        }
                    }
                }
            } catch (Exception ex) {
                log(ex);
            }
        }
    }

    // -------------------------------------------------------------------------

    public static String DEFAULT_LANGUAGE_SI = EXCEL_SI_PREFIX + "/language";

    public Topic getDefaultLanguageTopic(Cell cell, TopicMap tm) {
        if (cell != null && tm != null) {
            int i = cell.getColumnIndex();
            Topic typeTopic = getOrCreateTopic(tm, DEFAULT_LANGUAGE_SI + "/" + i, "Excel language " + i);
            return typeTopic;
        }
        return null;
    }

    // -------------------------------------------------------------------------
    // ---------------------------------------------------------- CONFIGURE ----
    // -------------------------------------------------------------------------

    @Override
    public boolean isConfigurable() {
        return true;
    }

    @Override
    public void configure(Wandora admin, org.wandora.utils.Options options, String prefix)
            throws TopicMapException {
        Wandora wandora = Wandora.getWandora();

        GenericOptionsDialog god = new GenericOptionsDialog(wandora, "Excel topic name extractor options",
                "Excel topic name extractor options", true,
                new String[][] {
                        new String[] { "First row contains languages?", "boolean",
                                (FIRST_ROW_CONTAINS_LANGUAGES ? "true" : "false"), null },
                        new String[] { "Create display names?", "boolean",
                                (ADD_DISPLAY_TO_SCOPE ? "true" : "false"), null },
                        new String[] { "Create sort names?", "boolean", (ADD_SORT_TO_SCOPE ? "true" : "false"),
                                null }, },
                wandora);
        god.setVisible(true);
        if (god.wasCancelled())
            return;

        Map<String, String> values = god.getValues();

        FIRST_ROW_CONTAINS_LANGUAGES = ("true".equals(values.get("First row contains languages?")) ? true : false);
        ADD_DISPLAY_TO_SCOPE = ("true".equals(values.get("Create display names?")) ? true : false);
        ADD_SORT_TO_SCOPE = ("true".equals(values.get("Create sort names?")) ? true : false);

    }

}