br.ufmt.periscope.importer.impl.ESPACENETPatentImporter.java Source code

Introduction

Here is the source code for br.ufmt.periscope.importer.impl.ESPACENETPatentImporter.java
Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package br.ufmt.periscope.importer.impl;

import br.ufmt.periscope.enumerated.ClassificationType;
import br.ufmt.periscope.importer.PatentImporter;
import br.ufmt.periscope.model.Applicant;
import br.ufmt.periscope.model.Classification;
import br.ufmt.periscope.model.Inventor;
import br.ufmt.periscope.model.Patent;
import br.ufmt.periscope.model.Priority;
import br.ufmt.periscope.repository.CountryRepository;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Iterator;
import java.util.List;
import java.util.StringTokenizer;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.inject.Inject;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;

/**
 *
 * @author adrian
 */
public class ESPACENETPatentImporter implements PatentImporter {

    private @Inject CountryRepository countryRepository;
    private String lang = "EN";
    private HSSFWorkbook wb;
    private HSSFSheet sheet;
    private Row row;
    private InputStreamReader isr;
    private BufferedReader br;
    private Iterator<Row> rowIterator;
    private String line;
    private SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
    private SimpleDateFormat sdf2 = new SimpleDateFormat("yyyyMMdd");
    private InputStream fileDetect;
    private InputStream content;
    private String fileType;
    private String vet[] = null;
    private String[] array = null;
    private Patent patent;

    @Override
    public boolean initWithStream(InputStream is) {
        try {

            /*Clonando a inputstream para manipul-la, so geradas outras 2 inputstream
             A primeira (fileDetect)  reponsvel pela deteco do formato do arquivo
             A segunda (content)  o contedo em si da inputstream
             */
            cloneInputStream(is);
            isr = new InputStreamReader(fileDetect);
            br = new BufferedReader(isr);

            //Detectando o tipo de arquivo, a primeira linha do CSV sempre comea com "Title" ...
            if (!br.readLine().matches("\"Title.*")) {
                fileType = new String("xls");
                xlsManipulator();
            } else {
                fileType = new String("csv");
                next();
            }

        } catch (IOException ex) {
            //Se entrar aqui  porque o arquivo no est no padro
            Logger.getLogger(ESPACENETPatentImporter.class.getName()).log(Level.SEVERE, null, ex);
            return false;
        }
        return true;
    }

    private void cloneInputStream(InputStream input) {
        try {
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            // Fake code simulating the copy
            // You can generally do better with nio if you need...
            // And please, unlike me, do something about the Exceptions :D
            byte[] buffer = new byte[1024];
            int len;
            while ((len = input.read(buffer)) > -1) {
                baos.write(buffer, 0, len);
            }
            baos.flush();

            // Open new InputStreams using the recorded bytes
            // Can be repeated as many times as you wish
            fileDetect = new ByteArrayInputStream(baos.toByteArray());
            content = new ByteArrayInputStream(baos.toByteArray());

        } catch (IOException ex) {
            Logger.getLogger(ESPACENETPatentImporter.class.getName()).log(Level.SEVERE, null, ex);
        }

    }

    @Override
    public boolean hasNext() {
        if (fileType.equalsIgnoreCase("xls")) {
            if (rowIterator.hasNext()) {
                return true;
            }
        } else {
            if (line == null) {
                return false;
            }
            if (line.trim().length() <= 0) {
                return false;
            }
            return true;
        }

        return false;
    }

    @Override
    public Patent next() {
        patent = new Patent();
        if (fileType.equalsIgnoreCase("xls")) {
            parseLineXLS();
        } else {
            if (line != null) {
                parseLineCSV();
            }
            nextLine();
        }
        return patent;
    }

    @Override
    public void remove() {
    }

    private void xlsManipulator() {
        try {
            wb = new HSSFWorkbook(content);
            sheet = wb.getSheetAt(0);

            rowIterator = sheet.iterator();

            //Pulando primeiras linhas
            rowIterator.next(); //Logotipo
            rowIterator.next();// Numero de resultados encontrados na busca
            rowIterator.next(); // Titulo da pesquisa
            rowIterator.next(); // Quantidade de Publicaes exibidas
            rowIterator.next(); // Nome das Colunas (Titulo, Publicao, Autor ...)
        } catch (IOException ex) {
            Logger.getLogger(ESPACENETPatentImporter.class.getName()).log(Level.SEVERE, null, ex);
        }

    }

    /*
     * Metodo para clonar uma inputStream retirado do link abaixo
     * http://stackoverflow.com/questions/5923817/how-to-clone-an-inputstream
     */
    private void parseLineXLS() {

        patent.setLanguage(lang);

        row = rowIterator.next(); //Percorrendo cada linha (patente)
        // Para cada linha (patente), pega cada atributo (Titulo, Publicao, Autor ...)
        Iterator<Cell> cellIterator = row.cellIterator();
        while (cellIterator.hasNext()) {

            Cell cell = cellIterator.next(); // Pegando cada coluna (atributo)

            switch (cell.getCellType()) {

            case Cell.CELL_TYPE_STRING:
                fillPatentXLS(cell.getColumnIndex(), cell.getStringCellValue());
                break;
            default:
                break;
            }

        }
    }

    private void fillPatentXLS(int columnIndex, String contentStrin) {

        switch (columnIndex) {
        case 0:
            patent.setTitleSelect(contentStrin);
            break;
        case 1:
            patent.setPublicationNumber(contentStrin);
            break;
        case 2:
            try {
                patent.setPublicationDate(sdf.parse(contentStrin));
            } catch (ParseException ex) {
                Logger.getLogger(ESPACENETPatentImporter.class.getName()).log(Level.SEVERE, null, ex);
            }
            break;
        case 3:
            List<Inventor> inventors = patent.getInventors();
            String aux;
            StringTokenizer st = new StringTokenizer(contentStrin, "\n");
            while (st.hasMoreTokens()) {
                StringTokenizer st2 = new StringTokenizer(st.nextToken());
                Inventor inventor = new Inventor();
                String name = new String();
                String country = new String();
                while (st2.hasMoreTokens()) {
                    aux = st2.nextToken();
                    if (aux.matches("\\[.+\\]+")) {
                        aux = aux.replace("[", "");
                        aux = aux.replace("]", "");
                        country = aux;
                    } else {
                        name = name.concat(aux + " ");
                    }
                    inventor.setName(name);
                    inventor.setCountry(countryRepository.getCountryByAcronym(country));
                }
                if (!name.trim().isEmpty()) {
                    inventors.add(inventor);
                }
            }
            patent.setInventors(inventors);
            break;
        case 4:
            List<Applicant> applicants = patent.getApplicants();

            st = new StringTokenizer(contentStrin, "\n");
            while (st.hasMoreTokens()) {
                StringTokenizer st2 = new StringTokenizer(st.nextToken());
                Applicant applicant = new Applicant();
                String name = new String();
                String country = new String();
                while (st2.hasMoreTokens()) {
                    aux = st2.nextToken();
                    if (aux.matches("\\[.+\\]+")) {
                        aux = aux.replace("[", "");
                        aux = aux.replace("]", "");
                        country = aux;
                    } else {
                        name = name.concat(aux + " ");
                    }
                    applicant.setName(name);
                    applicant.setCountry(countryRepository.getCountryByAcronym(country));
                }
                if (!name.trim().isEmpty()) {
                    applicants.add(applicant);
                }
            }
            patent.setApplicants(applicants);
            break;
        case 5:
            List<Classification> classifications = patent.getClassifications();
            st = new StringTokenizer(contentStrin, "\n");
            while (st.hasMoreTokens()) {
                Classification classification = new Classification();
                classification.setType(ClassificationType.IC);
                classification.setValue(st.nextToken());
                classifications.add(classification);
            }
            try {
                patent.setMainClassification(classifications.get(0));
            } catch (Exception ex) {
            }
            patent.setClassifications(classifications);
            break;
        case 6:
            //"Cooperative Patent Classification: ";
            break;
        case 7:
            patent.setApplicationNumber(contentStrin);
            break;
        case 8:
            try {
                patent.setApplicationDate(sdf2.parse(contentStrin));
            } catch (ParseException ex) {
                Logger.getLogger(ESPACENETPatentImporter.class.getName()).log(Level.SEVERE, null, ex);
            }
            break;
        case 9:
            List<Priority> priorities = patent.getPriorities();
            st = new StringTokenizer(contentStrin, "\n");
            while (st.hasMoreTokens()) {
                aux = st.nextToken();
                Priority priority = new Priority();
                priority.setCountry(countryRepository.getCountryByAcronym(aux.substring(0, 1)));
                StringTokenizer st2 = new StringTokenizer(aux);
                aux = st2.nextToken();
                priority.setValue(aux.substring(2));
                aux = st2.nextToken();
                try {
                    priority.setDate(sdf2.parse(aux));
                } catch (ParseException ex) {
                    Logger.getLogger(ESPACENETPatentImporter.class.getName()).log(Level.SEVERE, null, ex);
                }
                priorities.add(priority);
            }
            patent.setPriorities(priorities);
            //"Priority number(s): ";
            //       pat.setPriorities(null);
            break;
        }

    }

    private void nextLine() {
        try {
            line = br.readLine();
        } catch (IOException e) {
            line = null;
            //e.printStackTrace();
        }
    }

    private void fillPatentCSV() {
        patent.setLanguage(lang);
        vet = line.split("\",");

        List<Inventor> inventors = patent.getInventors();
        List<Applicant> applicants = patent.getApplicants();
        List<Classification> classifications = patent.getClassifications();
        List<Priority> priorities = patent.getPriorities();
        for (int i = 0; i < vet.length; i++) {
            array = vet[i].split(";");
            for (int j = 0; j < array.length; j++) {

                String contentString = new String(array[j]);
                contentString = contentString.replaceAll("\"", "");
                contentString = contentString.trim();

                switch (i) {
                case 0:
                    patent.setTitleSelect(contentString);
                    break;
                case 1:
                    patent.setPublicationNumber(contentString);
                    break;
                case 2:
                    try {
                        patent.setPublicationDate(sdf.parse(contentString));
                    } catch (ParseException ex) {
                        Logger.getLogger(ESPACENETPatentImporter.class.getName()).log(Level.SEVERE, null, ex);
                    }
                    break;
                case 3:
                    if (!contentString.replaceAll("\\[.*]", "").trim().isEmpty()) {
                        Inventor inventor = new Inventor();
                        inventor.setName(contentString.replaceAll("\\[.*]", "").trim());
                        try {
                            inventor.setCountry(countryRepository.getCountryByAcronym(contentString
                                    .substring(contentString.indexOf("[") + 1, contentString.indexOf("]"))));
                        } catch (IndexOutOfBoundsException ex) {
                        }
                        inventors.add(inventor);
                    }
                    break;
                case 4:
                    if (!contentString.replaceAll("\\[.*]", "").trim().isEmpty()) {
                        Applicant applicant = new Applicant();
                        applicant.setName(contentString.replaceAll("\\[.*]", "").trim());
                        try {
                            applicant.setCountry(countryRepository.getCountryByAcronym(contentString
                                    .substring(contentString.indexOf("[") + 1, contentString.indexOf("]"))));
                        } catch (IndexOutOfBoundsException ex) {
                        }
                        applicants.add(applicant);
                    }
                    break;
                case 5:
                    Classification classification = new Classification();
                    classification.setValue(contentString);
                    classification.setType(ClassificationType.IC);
                    classifications.add(classification);
                    break;
                case 6:
                    //"Cooperative Patent Classification: ";
                    break;
                case 7:
                    patent.setApplicationNumber(contentString);
                    break;
                case 8:
                    try {
                        patent.setApplicationDate(sdf2.parse(contentString));
                    } catch (ParseException ex) {
                        Logger.getLogger(ESPACENETPatentImporter.class.getName()).log(Level.SEVERE, null, ex);
                    }
                    break;
                case 9:
                    Priority priority = new Priority();
                    try {
                        priority.setCountry(countryRepository.getCountryByAcronym(contentString.substring(0, 2)));
                    } catch (IndexOutOfBoundsException ex) {
                    }
                    try {
                        priority.setValue(contentString.substring(2, contentString.indexOf(" ")).trim());
                    } catch (IndexOutOfBoundsException ex) {
                    }
                    try {

                        priority.setDate(sdf2.parse(contentString.substring(contentString.indexOf(" ")).trim()));
                    } catch (ParseException ex) {
                        Logger.getLogger(ESPACENETPatentImporter.class.getName()).log(Level.SEVERE, null, ex);
                    } catch (IndexOutOfBoundsException ex) {
                    }
                    priorities.add(priority);
                    break;
                }
            }
        }
        patent.setApplicants(applicants);
        patent.setClassifications(classifications);
        try {
            patent.setMainClassification(classifications.get(0));
        } catch (Exception ex) {
        }
        patent.setPriorities(priorities);
        patent.setInventors(inventors);

    }

    private void parseLineCSV() {
        fillPatentCSV();
    }

    @Override
    public String provider() {
        return "ESPACENET";
    }
}