com.maxl.java.aips2sqlite.PseudoExpertInfo.java Source code

Java tutorial

Introduction

Here is the source code for com.maxl.java.aips2sqlite.PseudoExpertInfo.java

Source

/*
Copyright (c) 2014 Max Lungarella
    
This file is part of Aips2SQLite.
    
Aips2SQLite is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
    
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
    
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package com.maxl.java.aips2sqlite;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.FileFilterUtils;
import org.apache.commons.io.filefilter.TrueFileFilter;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Entities.EscapeMode;

public class PseudoExpertInfo {

    // This is the location of the directory with the pseudo "Fachinfos"
    private static final String FILE_PSEUDO_INFO_DIR = "./input/pseudo/";

    private SqlDatabase mSqlDB = null;

    private String mLanguage = "";

    private ArrayList<String> mSectionContent;

    private ArrayList<String> mSectionTitles;

    private ArrayList<String> mBarCodes = new ArrayList<String>();

    private Map<String, Product> m_map_products = null;
    // Packages string used for "shopping" purposes (will contain ean code, pharma codes, prices etc.) 
    private List<String> m_list_of_packages = null;

    private MedicalInformations.MedicalInformation mMedi;

    private String mEanCodes_str = "";

    private String mSectionIds_str = "";

    private String mSectionTitles_str = "";

    // Package section string
    private String m_pack_info_str = "";

    private int mCustomerId;

    public PseudoExpertInfo(SqlDatabase sqlDB, String language, Map<String, Product> map_products) {
        mSqlDB = sqlDB;
        mLanguage = language;
        m_map_products = map_products;
        m_list_of_packages = new ArrayList<String>();
        // This sets the customer id (as of yet unused)
        mCustomerId = 2;
    }

    public void download() {
        // TODO: Life connection to OneDrive
    }

    /*
     *    Loads all filenames from directory into a list
     */
    public int process() {
        try {
            File dir = new File(FILE_PSEUDO_INFO_DIR);
            if (dir != null && dir.isDirectory()) {
                Collection<File> files = FileUtils.listFiles(dir, FileFilterUtils.suffixFileFilter(".docx"),
                        TrueFileFilter.INSTANCE);
                if (files != null) {
                    System.out.println("\nProcessing total of " + files.size() + " pseudo Fachinfos...");
                    int idxPseudo = 1;
                    for (File pseudo : files) {
                        if (pseudo.isFile()) {
                            FileInputStream pseudoInfoFile = new FileInputStream(pseudo.getAbsoluteFile());
                            if (extractInfo(idxPseudo, pseudoInfoFile))
                                idxPseudo++;
                        }
                    }
                    return idxPseudo - 1;
                }
            } else {
                System.out.println("Directory with pseudo FIs not found!");
            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
            return 0;
        }
        return 0;
    }

    /**
     * Extracts all the important information from the pseudo "Fachinfo" file
     * @param pseudo_info_file
     */
    public boolean extractInfo(int idx, FileInputStream pseudo_info_file) {
        mMedi = new MedicalInformations.MedicalInformation();

        mSectionContent = new ArrayList<String>();
        mSectionTitles = new ArrayList<String>();
        mBarCodes = new ArrayList<String>();
        m_list_of_packages = new ArrayList<String>();

        String mediTitle = "";
        String mediAuthor = "";
        String mediPseudoTag = "";
        String mediHtmlContent = "";

        StringBuilder content = new StringBuilder();

        try {
            // Read in docx file
            XWPFDocument docx = new XWPFDocument(pseudo_info_file);
            // Get iterator through all paragraphs
            Iterator<XWPFParagraph> para = docx.getParagraphsIterator();

            // Pre-process input stream to extract paragraph titles
            boolean goodToGo = false;
            while (para.hasNext()) {
                List<XWPFRun> runs = para.next().getRuns();
                if (!runs.isEmpty()) {
                    for (XWPFRun r : runs) {
                        // bold and italics identifies section title!
                        if (r.isBold()) { // && r.isItalic()) {
                            String pText = r.getParagraph().getText();
                            // These are the first chapter titles (DE and FR)
                            if (pText.equals("Zusammensetzung") || pText.equals("Composition"))
                                goodToGo = true;
                            if (goodToGo == true)
                                mSectionTitles.add(pText);
                        }
                    }
                }
            }
            // Add "nil" at the end
            mSectionTitles.add("nil");

            if (mLanguage.equals("de") && !mSectionTitles.get(0).equals("Zusammensetzung"))
                return false;
            if (mLanguage.equals("fr") && !mSectionTitles.get(0).equals("Composition"))
                return false;

            // Reset iterator
            para = docx.getParagraphsIterator();

            // Init list for section content 
            for (int i = 0; i < mSectionTitles.size(); ++i)
                mSectionContent.add(i, "");

            // Get title
            if (para.hasNext())
                mediTitle = para.next().getParagraphText();
            // Get author while using "Medizinprodukt" as tag
            String prevParaText = "";
            while (para.hasNext()) {
                String paraText = para.next().getParagraphText();
                // If this word is not found, then no pseudo FI will be produced
                if (paraText.equals("Medizinprodukt") || paraText.equals("Dispositif mdical")) {
                    mediPseudoTag = paraText;
                    mediAuthor = prevParaText;
                    break;
                }
                prevParaText = paraText;
            }

            // Get section titles + sections + ean codes
            boolean isSectionPackungen = false;
            int numSection = 0;
            // Init with section1 and title
            String sectionId_str = "";
            String sectionTitle_str = "";
            mEanCodes_str = "";
            mSectionIds_str = "section1,";
            mSectionTitles_str = mediTitle + ",";
            m_pack_info_str = "";
            // This is the EAN code pattern
            Pattern pattern = Pattern.compile("^[0-9]{13}");
            // Loop through it, identifying medication title, author, section titles and corresponding titles
            while (para.hasNext()) {
                String paraText = para.next().getParagraphText();
                if (paraText.equals(mSectionTitles.get(numSection))) {
                    // ->> Get section title
                    isSectionPackungen = false;
                    // Get section title
                    if (numSection < mSectionTitles.size())
                        numSection++;
                    // Section "Packungen" is special
                    if (paraText.equals("Packungen") || paraText.equals("Prsentation")) {
                        isSectionPackungen = true;
                    }
                    // Close previous div
                    if (numSection > 1)
                        content.append("</div>");
                    // Create html
                    sectionId_str = "section" + (numSection + 1); // section1 is reserved for the MonTitle
                    sectionTitle_str = mSectionTitles.get(numSection - 1);
                    content.append("<div class=\"paragraph\" id=\"" + sectionId_str + "\">");
                    content.append("<div class=\"absTitle\">" + sectionTitle_str + "</div>");
                    // Generate section id string
                    mSectionIds_str += (sectionId_str + ",");
                    // Generate titles string
                    mSectionTitles_str += (sectionTitle_str + ";");
                } else {
                    // ->> Get section content
                    String s = mSectionContent.get(numSection - 1);
                    mSectionContent.set(numSection - 1, s + paraText + " ");
                    // Create html
                    content.append("<p class=\"spacing1\">" + paraText + "</p>");
                    // Extract EAN codes and start positions
                    Matcher matcher = pattern.matcher(paraText);
                    while (matcher.find()) {
                        String eanCode = matcher.group();
                        mEanCodes_str += (eanCode + ", ");
                        if (!eanCode.isEmpty()) {
                            String pup = "";
                            String efp = "";
                            String fep = "";
                            String fap = "";
                            String vat = "";
                            String size = "";
                            String units = "";
                            String swissmedic_cat = "";
                            String pharma_code = "";
                            int visible = 0xff;
                            int has_free_samples = 0x00; // by default no free samples
                            // Exctract fep and fap pricing information
                            // FAP = Fabrikabgabepreis = EFP?
                            // FEP = Fachhandelseinkaufspreis
                            // EFP = FAP < FEP < PUP                     
                            if (m_map_products != null && eanCode != null && m_map_products.containsKey(eanCode)) {
                                Product product = m_map_products.get(eanCode);
                                if (product.efp > 0.0f)
                                    efp = String.format("CHF %.2f", product.efp);
                                if (product.pp > 0.0f)
                                    pup = String.format("CHF %.2f", product.pp);
                                if (product.fap > 0.0f)
                                    fap = String.format("CHF %.2f", product.fap);
                                if (product.fep > 0.0f)
                                    fep = String.format("CHF %.2f", product.fep);
                                if (product.vat > 0.0f)
                                    vat = String.format("%.2f", product.vat);
                                if (product.size != null && !product.size.isEmpty())
                                    size = product.size;
                                if (product.units != null && product.units.length > 0)
                                    units = product.units[0];
                                if (product.swissmedic_cat != null && !product.swissmedic_cat.isEmpty())
                                    swissmedic_cat = product.swissmedic_cat;
                                if (product.pharmacode != null && !product.pharmacode.isEmpty())
                                    pharma_code = product.pharmacode;
                                visible = product.visible;
                                has_free_samples = product.free_sample;
                            }
                            m_list_of_packages.add(mediTitle.toUpperCase() + ", " + units + ", " + size + "|" + size
                                    + "|" + units + "|" + efp + "|" + pup + "|" + fap + "|" + fep + "|" + vat + "|"
                                    + swissmedic_cat + ",,|" + eanCode + "|" + pharma_code + "|" + visible + "|"
                                    + has_free_samples + "\n");
                            // Generate bar codes
                            BarCode bc = new BarCode();
                            String barcodeImg64 = bc.encode(eanCode);
                            mBarCodes.add("<p class=\"spacing1\">" + barcodeImg64 + "</p>");
                            content.append(barcodeImg64);
                        }
                    }
                    // Generate section Packungen for search result
                    if (isSectionPackungen)
                        m_pack_info_str += (paraText + "\n");
                }
            }
            /*
            // Add chapter "Barcodes"
            content.append("<p class=\"paragraph\"></p><div class=\"absTitle\">" + "Barcodes" + "</div>");
            for (String bcode : mBarCodes)
               content.append(bcode);
            */
            // Remove last comma from mEanCodes_str
            if (!mEanCodes_str.isEmpty())
                mEanCodes_str = mEanCodes_str.substring(0, mEanCodes_str.length() - 2);
            // Remove last \n from mSectionPackungen_str
            if (!m_pack_info_str.isEmpty())
                m_pack_info_str = m_pack_info_str.substring(0, m_pack_info_str.length() - 1);

            // Set title, autor
            mMedi.setTitle(mediTitle);
            mMedi.setAuthHolder(mediAuthor);
            mMedi.setAtcCode("PSEUDO");
            mMedi.setSubstances(mediTitle);

            System.out.println(idx + " - " + mediTitle + ": " + mEanCodes_str);

            // Close previous div + monographie div
            content.append("</div></div>");
            String title = "<div class=\"MonTitle\" id=\"section1\">" + mediTitle + "</div>";
            String author = "<div class=\"ownerCompany\"><div style=\"text-align: right;\">" + mediAuthor
                    + "</div></div>";
            // Set "Medizinprodukt" label
            String pseudo = "<p class=\"spacing1\">" + mediPseudoTag + "</p>";
            // Set medi content         
            mediHtmlContent = "<html><head></head><body><div id=\"monographie\">" + title + author + pseudo
                    + content.toString() + "</div></body></html>";

            // Generate clean html file
            Document doc = Jsoup.parse(mediHtmlContent);
            doc.outputSettings().escapeMode(EscapeMode.xhtml);
            doc.outputSettings().charset("UTF-8");
            doc.outputSettings().prettyPrint(true);
            doc.outputSettings().indentAmount(1);
            mediHtmlContent = doc.html();

            // Set html content
            mMedi.setContent(mediHtmlContent);

            // Add to DB
            addToDB();

            return true;
        } catch (IOException e) {
            e.printStackTrace();
            return false;
        }
    }

    private void addToDB() {
        // orggen_str = "P" (=pseudo)
        List<String> emptyList = new ArrayList<String>();
        emptyList.add("PSEUDO");
        emptyList.add("PSEUDO");

        String packages_str = "";
        for (String s : m_list_of_packages)
            packages_str += s;

        mSqlDB.addExpertDB(mMedi, packages_str, mEanCodes_str, mSectionIds_str, mSectionTitles_str, mEanCodes_str,
                "", m_pack_info_str, "", mCustomerId, emptyList, "");
    }
}