com.stam.excellatin.ExcelLatin.java Source code

Java tutorial

Introduction

Here is the source code for com.stam.excellatin.ExcelLatin.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package com.stam.excellatin;

import com.ibm.icu.text.Transliterator;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

/**
 *
 * @author StamaterisG
 */
public class ExcelLatin {

    private static final List<String> validOptions = new ArrayList<>();

    static {
        validOptions.add("-L");
        validOptions.add("-G");
        validOptions.add("-d");
    }

    public static void main(String[] args) {
        List<String> options = new ArrayList<>();
        int startIndex = 0;
        for (String arg : args) {
            if (validOptions.contains(arg)) {
                options.add(arg);
                startIndex++;
            }
        }

        if (args[0].equals("-h") || args.length < 3) {
            System.out.println("usage: ExcelLatin [options] filenameIn filenameOut columnNames...");
            System.out.println("options:");
            System.out.println("\t-L\tto Latin (default)");
            System.out.println("\t-G\tto Greek");
            System.out.println("\t-d\tdon't deaccent");
            System.out.println("\t-h\thelp");
        } else {
            boolean greekToLatin = false;
            boolean latinToGreek = false;
            Transliterator transliterator = null;
            if ((!options.contains("-L") && !options.contains("-G")) || options.contains("-L")) {
                transliterator = Transliterator.getInstance("Greek-Latin/UNGEGN");
                System.out.println("\nTransliterating Greek to Latin");
                greekToLatin = true;
            } else if (options.contains("-G")) {
                transliterator = Transliterator.getInstance("Latin-Greek/UNGEGN");
                System.out.println("\nTransliterating Latin to Greek");
                latinToGreek = true;
            }

            if (transliterator == null) {
                System.out.println("Not a valid option for the transliteration language");
                return;
            }

            boolean deAccent = true;
            if (options.contains("-d")) {
                deAccent = false;
                System.out.println("Will not deaccent");
            }

            String fileNameIn = args[startIndex];
            String fileNameOut = args[startIndex + 1];
            List<String> columnNames = new ArrayList<>();
            System.out.println("\nColumns to transliterate\n---------------------------");
            for (int i = startIndex + 2; i < args.length; i++) {
                columnNames.add(args[i]);
                System.out.println(args[i]);
            }
            System.out.println("\n");

            try {
                File file = new File(fileNameIn);
                if (!file.exists()) {
                    System.out.println("The file " + fileNameIn + " was not found");
                    return;
                }

                Map<String, String> mapTransformations = new HashMap<>();
                Scanner sc = new Scanner(new FileReader("map.txt"));
                while (sc.hasNextLine()) {
                    String greekEntry = sc.next();
                    String latinEntry = sc.next();

                    if (greekToLatin) {
                        mapTransformations.put(greekEntry, latinEntry);
                    } else if (latinToGreek) {
                        mapTransformations.put(latinEntry, greekEntry);
                    }
                }

                DataFormatter formatter = new DataFormatter();
                Workbook wb = WorkbookFactory.create(file);

                Workbook newWb = null;
                if (wb instanceof HSSFWorkbook) {
                    newWb = new HSSFWorkbook();
                } else if (wb instanceof XSSFWorkbook) {
                    newWb = new XSSFWorkbook();
                }
                FileOutputStream fileOut = new FileOutputStream(fileNameOut);
                if (newWb != null) {
                    Sheet sheetOut = newWb.createSheet();

                    Sheet sheet = wb.getSheetAt(0);

                    List<Integer> idxs = new ArrayList<>();

                    Row row = sheet.getRow(0);
                    for (Cell cell : row) {
                        String cellVal = formatter.formatCellValue(cell);
                        if (cellVal == null || cellVal.trim().equals("")) {
                            break;
                        }

                        if (columnNames.contains(cell.getStringCellValue())) {
                            idxs.add(cell.getColumnIndex());
                        }
                    }

                    for (Row rowIn : sheet) {
                        Row rowOut = sheetOut.createRow(rowIn.getRowNum());
                        if (rowIn.getRowNum() == 0) {
                            for (Cell cell : rowIn) {
                                cell.setCellType(Cell.CELL_TYPE_STRING);
                                Cell cellOut = rowOut.createCell(cell.getColumnIndex());
                                cellOut.setCellValue(cell.getStringCellValue());
                            }
                        } else {
                            for (Cell cell : rowIn) {
                                cell.setCellType(Cell.CELL_TYPE_STRING);
                                String cellVal = formatter.formatCellValue(cell);
                                String cellNewVal = cellVal;
                                if (idxs.contains(cell.getColumnIndex()) && cellVal != null) {
                                    if (mapTransformations.containsKey(cellVal)) {
                                        cellNewVal = mapTransformations.get(cellVal);
                                    } else {
                                        if (deAccent) {
                                            cellNewVal = deAccent(transliterator.transform(cellVal));
                                        } else {
                                            cellNewVal = transliterator.transform(cellVal);
                                        }
                                    }
                                }
                                Cell cellOut = rowOut.createCell(cell.getColumnIndex());
                                cellOut.setCellValue(cellNewVal);
                            }
                        }
                    }

                    System.out.println("Finished!");

                    newWb.write(fileOut);
                    fileOut.close();
                }
            } catch (IOException | InvalidFormatException ex) {
                Logger.getLogger(ExcelLatin.class.toString()).log(Level.SEVERE, null, ex);
            }
        }

    }

    private final static Pattern diactiticalMarksPattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");

    private static String deAccent(String str) {
        String nfdNormalizedString = Normalizer.normalize(str, Normalizer.Form.NFD);
        return diactiticalMarksPattern.matcher(nfdNormalizedString).replaceAll("");
    }
}