Example usage for org.apache.poi.hwpf HWPFDocument HWPFDocument

List of usage examples for org.apache.poi.hwpf HWPFDocument HWPFDocument

Introduction

In this page you can find the example usage for org.apache.poi.hwpf HWPFDocument HWPFDocument.

Prototype

public HWPFDocument(DirectoryNode directory) throws IOException 

Source Link

Document

This constructor loads a Word document from a specific point in a POIFSFileSystem, probably not the default.

Usage

From source file:poi.xssf.usermodel.examples.EmbeddedObjects.java

License:Apache License

public static void main(String[] args) throws Exception {
    OPCPackage pkg = OPCPackage.open(args[0]);
    XSSFWorkbook workbook = new XSSFWorkbook(pkg);
    for (PackagePart pPart : workbook.getAllEmbedds()) {
        String contentType = pPart.getContentType();
        // Excel Workbook - either binary or OpenXML
        if (contentType.equals("application/vnd.ms-excel")) {
            HSSFWorkbook embeddedWorkbook = new HSSFWorkbook(pPart.getInputStream());
        }//from   w  ww .j a va  2 s .c  o  m
        // Excel Workbook - OpenXML file format
        else if (contentType.equals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")) {
            XSSFWorkbook embeddedWorkbook = new XSSFWorkbook(pPart.getInputStream());
        }
        // Word Document - binary (OLE2CDF) file format
        else if (contentType.equals("application/msword")) {
            HWPFDocument document = new HWPFDocument(pPart.getInputStream());
        }
        // Word Document - OpenXML file format
        else if (contentType
                .equals("application/vnd.openxmlformats-officedocument.wordprocessingml.document")) {
            XWPFDocument document = new XWPFDocument(pPart.getInputStream());
        }
        // PowerPoint Document - binary file format
        else if (contentType.equals("application/vnd.ms-powerpoint")) {
            HSLFSlideShow slideShow = new HSLFSlideShow(pPart.getInputStream());
        }
        // PowerPoint Document - OpenXML file format
        else if (contentType
                .equals("application/vnd.openxmlformats-officedocument.presentationml.presentation")) {
            OPCPackage docPackage = OPCPackage.open(pPart.getInputStream());
            XSLFSlideShow slideShow = new XSLFSlideShow(docPackage);
        }
        // Any other type of embedded object.
        else {
            System.out.println("Unknown Embedded Document: " + contentType);
            InputStream inputStream = pPart.getInputStream();
        }
    }
    pkg.close();
}

From source file:projekt.servise.impl.ReadDataFromWordServiceImpl.java

@Override
public void getData() {
    String FilePath = "C:/Users/Lenovo/Documents/NetBeansProjects/SoftwareArchitectureProject-master/src/main/java/projekt/nimekiri_test.doc";
    FileInputStream fis;/*from  w w  w.j a v  a 2  s  .  co  m*/
    try {
        fis = new FileInputStream(new File(FilePath));
        HWPFDocument doc = new HWPFDocument(fis);
        WordExtractor extractor = new WordExtractor(doc);

        Connection conn = DriverManager.getConnection(
                "jdbc:postgresql://dev.vk.edu.ee:5432/GroupWork?currentSchema=project", "t131566", "t131566");

        String text = extractor.getText();
        String strippedText = extractor.stripFields(text).replace("\r\n\r\n", "\n").replace("\t", " ")
                .replace("\r\n", "\n");
        String[] paragraphs = strippedText.split("\n");
        String code = "";
        List<String> groupNames = groupService.getGroupNames();

        for (int i = 8; i < paragraphs.length; i++) {
            String line = paragraphs[i].replace("*", "").replace("OK", "").replace("TREV", "").replace("REV",
                    "");
            int index = 0;
            String jrk = "";
            if (!paragraphs[i].trim().isEmpty() && paragraphs[i].substring(0, 3).contains("Jrk")) {
                String groupCode = paragraphs[i].substring(17, 21);
                jrk = paragraphs[i];
                index = strippedText.indexOf(jrk);
                String groupName = "";
                int j = 1;
                do {
                    if (!paragraphs[i - j].trim().isEmpty()) {
                        if (Character.isUpperCase(paragraphs[i - j].charAt(3))) {
                            groupName = paragraphs[i - j];
                            if (groupName.contains("(KAUGPE)")) {
                                groupName = groupName.replace("(KAUGPE)", "").trim();
                            }
                        }
                    }

                    j++;
                } while (!paragraphs[i - j].trim().isEmpty());

                PreparedStatement preparedStatementGetGroup = conn.prepareStatement(
                        "SELECT id FROM project.group where name is null and groupcode like ?");
                preparedStatementGetGroup.setString(1, groupCode + "%");
                ResultSet resultGroup = preparedStatementGetGroup.executeQuery();
                while (resultGroup.next()) {
                    int groupId = resultGroup.getInt(1);
                    PreparedStatement preparedStatementSetGroupName = conn
                            .prepareStatement("UPDATE project.group SET name=? where id=?");
                    preparedStatementSetGroupName.setString(1, groupName.replace("  ", " "));
                    preparedStatementSetGroupName.setInt(2, groupId);
                    preparedStatementSetGroupName.executeUpdate();
                }
            }
        }

        for (int i = 8; i < paragraphs.length; i++) {
            String line = paragraphs[i].replace("*", "").replace("OK", "").replace("TREV", "").replace("REV",
                    "");

            /*   int index = 0;
            String jrk = "";*/
            if (!paragraphs[i].trim().isEmpty() && paragraphs[i].substring(0, 3).contains("Jrk")) {
                /* String groupCode = paragraphs[i].substring(17, 21);*/
                code = line.substring(line.indexOf(":") + 1, line.indexOf(":") + 11);
                code = code.replace("", "").replace(" - ", "").replace("  ", "");
            }
            if (!line.trim().isEmpty() && !line.contains("KOOD") && !line.contains("KAUGPE")
                    && !line.contains("lipilane") && !groupNames.contains(line) && !line.contains("Jrk")
                    && !isAllUpperCase(line)) {

                String[] splittedLine = line.split(" ");
                String studentLastname = "";
                List<String> newSplittedLine = new ArrayList<String>();

                for (String item : splittedLine) {
                    if (!item.isEmpty()) {
                        newSplittedLine.add(item);
                    }
                }
                if (newSplittedLine.size() >= 4) {
                    PreparedStatement preparedStatementGetStudent = conn
                            .prepareStatement("SELECT * FROM project.student where code=?");
                    if (newSplittedLine.size() == 4) {
                        preparedStatementGetStudent.setString(1, newSplittedLine.get(2));
                        studentLastname = newSplittedLine.get(1);

                    } else if (newSplittedLine.size() == 5) {
                        preparedStatementGetStudent.setString(1, newSplittedLine.get(3));
                        studentLastname = newSplittedLine.get(1) + " " + newSplittedLine.get(2);

                    }
                    ResultSet resultStudent = preparedStatementGetStudent.executeQuery();

                    if (!resultStudent.next()) {
                        PreparedStatement preparedStatementGetPerson = conn.prepareStatement(
                                "SELECT * FROM project.person where firstname=? and lastname=?");
                        preparedStatementGetPerson.setString(1, newSplittedLine.get(0));
                        preparedStatementGetPerson.setString(2, studentLastname);

                        ResultSet resultPersonExists = preparedStatementGetPerson.executeQuery();
                        if (!resultPersonExists.next()) {
                            PreparedStatement preparedStatementNewPerson = conn.prepareStatement(
                                    "INSERT INTO project.person (firstname,lastname,roleid) VALUES (?,?,?)");
                            if (newSplittedLine.size() == 4) {
                                preparedStatementNewPerson.setString(1, newSplittedLine.get(0));
                                preparedStatementNewPerson.setString(2, studentLastname);
                                preparedStatementNewPerson.setInt(3, 2);
                            } else if (newSplittedLine.size() == 5) {
                                preparedStatementNewPerson.setString(1, newSplittedLine.get(0));
                                preparedStatementNewPerson.setString(2, studentLastname);
                                preparedStatementNewPerson.setInt(3, 2);
                            }
                            preparedStatementNewPerson.executeUpdate();
                            PreparedStatement preparedStatementLastPerson = conn.prepareStatement(
                                    "SELECT id FROM project.person where firstname=? and lastname=? and roleid=?");
                            preparedStatementLastPerson.setString(1, newSplittedLine.get(0));
                            preparedStatementLastPerson.setString(2, studentLastname);
                            preparedStatementLastPerson.setInt(3, 2);
                            Integer personId = 0;
                            ResultSet resultPerson = preparedStatementLastPerson.executeQuery();
                            if (resultPerson.next()) {
                                personId = resultPerson.getInt(1);
                                System.out.println("GROUP CODE " + code);
                                Group1 group = groupService.getByGroupcode(code);

                                if (group != null) {
                                    System.out.println("GROUP ID " + group.getId());

                                    PreparedStatement preparedStatementSetStudent = conn.prepareStatement(
                                            "INSERT INTO project.student (personid,groupid,code) values(?,?,?)");
                                    preparedStatementSetStudent.setInt(1, personId);
                                    preparedStatementSetStudent.setInt(2, group.getId());
                                    preparedStatementSetStudent.setString(3, code);
                                    preparedStatementSetStudent.executeUpdate();
                                    System.out.println("NEW STUDENT " + personId);
                                } else {
                                    System.out.println("GROUP WAS NULL ");
                                    PreparedStatement preparedStatementGroup = conn.prepareStatement(
                                            "INSERT INTO project.group (groupcode) VALUES (?)");
                                    preparedStatementGroup.setString(1, code);
                                    preparedStatementGroup.executeUpdate();
                                    System.out.println("NEW GROUP " + code);
                                    code = code.replace(" ", "");
                                    PreparedStatement preparedStatementLastGroup = conn
                                            .prepareStatement("SELECT id FROM project.group where groupcode=?");
                                    preparedStatementLastGroup.setString(1, code);
                                    System.out.println("SELECT id FROM project.group where groupcode=" + code);
                                    int groupId = 0;
                                    ResultSet resultLastGroup = preparedStatementLastGroup.executeQuery();
                                    if (resultLastGroup.next()) {
                                        PreparedStatement preparedStatementStudentExist = conn.prepareStatement(
                                                "SELECT * FROM project.student where personid=?");
                                        preparedStatementStudentExist.setInt(1, personId);
                                        ResultSet studentExists = preparedStatementStudentExist.executeQuery();
                                        if (!studentExists.next()) {
                                            groupId = resultLastGroup.getInt(1);
                                            PreparedStatement preparedStatementSetStudent = conn
                                                    .prepareStatement(
                                                            "INSERT INTO project.student (personid,groupid,code) values(?,?,?)");
                                            preparedStatementSetStudent.setInt(1, personId);
                                            preparedStatementSetStudent.setInt(2, groupId);
                                            preparedStatementSetStudent.setString(3, code);
                                            preparedStatementSetStudent.executeUpdate();
                                            System.out.println("NEW STUDENT " + personId);
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                // System.out.println();
            }
        }
        conn.commit();
        conn.close();
    } catch (IOException e) {
        Logger.getLogger(ReadDataFromExcelServiceImpl.class.getName()).log(Level.SEVERE, null, e);
    } catch (SQLException ex) {
        Logger.getLogger(ReadDataFromWordServiceImpl.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:ro.dabuno.office.integration.Word2Forrest.java

License:Apache License

public static void main(String[] args) throws IOException {
    System.out.println(args[0]);//w w w . j a v a2 s  .c  om
    InputStream is = new FileInputStream(args[0]);
    OutputStream out = new FileOutputStream("test.xml");
    try {
        new Word2Forrest(new HWPFDocument(is), out);
    } finally {
        out.close();
        is.close();
    }
}

From source file:rocky.sizecounter.SizeCounterUtil.java

License:Apache License

/**
 * Count Word's number of page from input directory.
 * /*from w  w  w .  j  av a 2s .c  om*/
 * @param filePath .
 * @return Number of A4 pages
 */
public static int countWordFile(String filePath) {
    FileInputStream fis = null;
    int page = 0;
    try {
        fis = new FileInputStream(filePath);

        if (CommonUtil.getExtension(filePath).equals("doc")) { // When file is .DOC
            HWPFDocument doc = new HWPFDocument(fis);
            page = doc.getDocProperties().getCPg();
        } else if (CommonUtil.getExtension(filePath).equals("docx")) { // When file is .DOCX
            XWPFDocument doc = new XWPFDocument(fis);
            XWPFWordExtractor ex = new XWPFWordExtractor(doc);
            page = ex.getExtendedProperties().getUnderlyingProperties().getPages();
        }
    } catch (FileNotFoundException ex) {
        LOG.warn("File " + filePath + " not found", ex);
    } catch (IOException ex) {
        LOG.warn("Invalid when reading file.", ex);
    } catch (Exception ex) {
        LOG.warn("Can not count file " + filePath, ex);
    } finally {
        if (fis != null) {
            try {
                fis.close();
            } catch (IOException ex) {
                LOG.warn("Close the file input stream", ex);
            }
        }
    }
    return page;
}

From source file:rzd.vivc.astzpte.beans.pagebean.ReportBean.java

public String generateReport(User usr) {
    HWPFDocument doc;/*from  w  ww. j av  a2  s .  c  o m*/
    Ticket ticket = usr.getTickets().get(0);
    List<UserAnswer> answers = usr.getTickets().get(0).getAnswers();
    ArrayList<UserAnswerModel> questions = new ArrayList<>();
    for (int i = 0; i < answers.size(); i++) {
        if (answers.get(i).getAnswer() != null) {
            questions.add(new UserAnswerModel(answers.get(i), i));
        }
    }
    SimpleDateFormat format = new SimpleDateFormat("dd/MM/yyyy");
    SimpleDateFormat format1 = new SimpleDateFormat("hh:mm");
    try (FileInputStream fis = new FileInputStream("c:\\rep\\templ.doc")) {
        doc = new HWPFDocument(fis);
        doc.getRange().getParagraph(3).replaceText("(dtBeg)", format.format(ticket.getDt_create()));
        doc.getRange().getParagraph(9).replaceText("(timeBeg)", format1.format(ticket.getDt_create()));
        doc.getRange().getParagraph(11).replaceText("(timeFin)", format1.format(ticket.getFinish()));
        long num = usr.getNum();
        /* for (int i = 1; i <= 13; i++) {
        long mod = num % 10;*/
        doc.getRange().replaceText("(num)"/* + (13 - i + 1) + ")"*/, num + "");
        /* num = num / 10;
        }*/

        doc.getRange().getParagraph(24).replaceText("(allow1)",
                usr.getAllowNum() + "  " + format.format(usr.getAllowDat()));

        doc.getRange().replaceText("(tickNum)",
                ticket.getAnswers().get(0).getQuestion().getTicketTemplate().getNum() + "");
        doc.getRange().replaceText("(themeNum)",
                ticket.getAnswers().get(0).getQuestion().getTicketTemplate().getTheme().getId() + "");
        doc.getRange().replaceText("(themeName)",
                ticket.getAnswers().get(0).getQuestion().getTicketTemplate().getTheme().getName());
        int count = 0;
        for (int i = 1; i <= 50; i++) {
            UserAnswerModel answerModel = questions.get(i - 1);
            if (i < 10) {
                doc.getRange().replaceText("T0" + i, answerModel.getQuestion().getText());
                doc.getRange().replaceText("C0" + i, answerModel.givenNumber() + "");
                boolean cor = answerModel.correctNumber() == answerModel.givenNumber();
                if (cor) {
                    count++;
                }
                doc.getRange().replaceText("Y0" + i,
                        cor ? " " : "  ");
                doc.getRange().replaceText("B0" + i, cor ? 1 + "" : 0 + "");
            } else {
                doc.getRange().replaceText("T" + i, answerModel.getQuestion().getText());
                doc.getRange().replaceText("C" + i, answerModel.givenNumber() + "");
                boolean cor = answerModel.correctNumber() == answerModel.givenNumber();
                if (cor) {
                    count++;
                }
                doc.getRange().replaceText("Y" + i,
                        cor ? " " : "  ");
                doc.getRange().replaceText("B" + i, cor ? 1 + "" : 0 + "");
            }
        }
        doc.getRange().replaceText("BT", count + "");
        doc.getRange().replaceText("BT", count + "");

        FileOutputStream fos = new FileOutputStream("c:\\rep\\" + ticket.getId() + ".doc");
        doc.write(fos);
        fos.close();
    } catch (FileNotFoundException ex) {
        Logger.getLogger(ReportBean.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(ReportBean.class.getName()).log(Level.SEVERE, null, ex);
    }
    return ticket.getId() + ".doc";
}

From source file:sesiondataextractor.SesionDataExtractor.java

private static void getParticipaciones(List senadores) {
    File file = null;/*from  www . ja v a  2s  .c o m*/
    WordExtractor extractor = null;
    try {

        file = new File("testdata/S 30.doc");
        FileInputStream fis = new FileInputStream(file.getAbsolutePath());
        HWPFDocument document = new HWPFDocument(fis);
        extractor = new WordExtractor(document);
        String[] paragraphList = extractor.getParagraphText();
        String senadorName = "";
        String senadorSpeech = "";
        for (int i = 0; i < paragraphList.length; i++) {
            String paragraph = paragraphList[i];
            if (paragraph != null) {
                //looks like the start of a new participation
                if (paragraph.indexOf(":") != -1) {
                    //firstt persist the previous participation
                    System.out.println("SENADOR:" + senadorName);
                    // System.out.println("SPEECH"+senadorSpeech);
                    Sesiones s = null;

                    {
                        TypedQuery<Sesiones> q;
                        q = em.createNamedQuery("Sesiones.findBySesionId", Sesiones.class);
                        q.setParameter("sesionId", 1);

                        s = q.getSingleResult();
                    }

                    Participaciones p = new Participaciones();

                    //now process the new paragraph

                    senadorName = "";
                    senadorSpeech = "";

                    String speakerName = paragraph.substring(0, paragraph.indexOf(":"));

                    if (speakerName.length() < SENADORA_PREFIX.length()) {
                        //its a speech still 
                        senadorSpeech = senadorSpeech + paragraph;
                    } else if (speakerName.equals(PRESIDENT_STRING)) {

                        senadorName = PRESIDENT_STRING;
                    } else if (speakerName.substring(0, SENADOR_PREFIX.length()).equals(SENADOR_PREFIX)) {
                        //get the name
                        int prefixLength = 0;
                        if (speakerName.substring(0, SENADORA_PREFIX.length()).equals(SENADORA_PREFIX)) {
                            prefixLength = SENADORA_PREFIX.length();
                        } else {
                            prefixLength = SENADOR_PREFIX.length();
                        }

                        senadorName = speakerName.substring(prefixLength);

                    } else {
                        // System.out.println("*****UNRECOGNIZED "+speakerName);
                    }
                    senadorSpeech = paragraph.substring(speakerName.length());

                } else {
                    //it is the continuation of a speech

                    senadorSpeech = senadorSpeech + paragraph;

                }

            }

        }
    } catch (Exception exep) {
        exep.printStackTrace();
    }
}

From source file:textextractor.WordManager.java

public ArrayList extractDoc(FileInputStream fis) throws IOException {
    HWPFDocument doc = new HWPFDocument(fis);
    Range range = doc.getRange();/*from   ww  w.  j a  v  a 2s .c o m*/
    for (int i = 0; i < range.numParagraphs(); i++) {
        Paragraph p = range.getParagraph(i);
        StyleDescription style = doc.getStyleSheet().getStyleDescription(p.getStyleIndex());
        if (!"Normal".equals(style.getName())) {
            System.out.println(style.getName());
        }
        String[] ary = p.text().split(" ");
        System.out.println(p.text());
        listDoc = new ArrayList();
        listDoc.addAll(Arrays.asList(ary));
    }
    return listDoc;

}

From source file:ua.kiev.univ.linguistics.MSWordDocumentExtractor.java

License:Open Source License

public static Map<String, String> extract(String filename) throws IOException {
    HWPFDocument doc = new HWPFDocument(new FileInputStream(filename));
    WordExtractor extractor = new WordExtractor(doc);
    String texts[] = extractor.getParagraphText();
    List<PAPX> pars = doc.getParagraphTable().getParagraphs();
    ParagraphProperties paragraphs[] = new ParagraphProperties[pars.size()];

    for (int i = 0; i < paragraphs.length; i++) {
        paragraphs[i] = pars.get(i).getParagraphProperties(doc.getStyleSheet());
    }//from   ww w  .ja  va 2  s  .  c  o  m
    StringBuffer lt = new StringBuffer();
    int stage = 0;
    String par[] = new String[3];
    for (int i = 0; i < par.length; i++) {
        par[i] = "";
    }
    for (int i = 0; i < Math.min(paragraphs.length, texts.length); i++) {
        ParagraphProperties paragraphProperties = paragraphs[i];
        String text = texts[i];
        if (text.matches("[\\s]*")) {
            continue;
        }
        int type = getType(paragraphProperties, text);
        switch (stage) {
        case 0: {
            if (type == 1) {
                par[0] += text + "\n";
            } else if (type == 0) {
                par[1] += text + "\n";
                stage = 2;
            } else if (type == -1) {
                par[2] += text + "\n";
                stage = 2;
            }
            break;
        }
        case 2: {
            par[2] += text + "\n";
        }
        }
    }
    Map<String, String> textBlocks = new TreeMap<String, String>();
    if (!par[0].isEmpty()) {
        //par[0]=par[0].substring(0, par[0].length()-2);
        textBlocks.put(HEADER, trim(par[0]));
    }
    if (!par[1].isEmpty()) {
        //par[1]=par[1].substring(0, par[1].length()-2);
        textBlocks.put(TITLE, trim(par[1]));
    }
    if (!par[2].isEmpty()) {
        //par[2]=par[2].substring(0, par[2].length()-2);
        textBlocks.put(OTHER, trim(par[2]));
    }

    return textBlocks;
}

From source file:uk.ac.liverpool.MSOffice.MSWord.java

License:Open Source License

@Override
public SortedSet<IDInfo> getTypeInfo(Confidence min, Confidence max, String path, boolean complete)
        throws IOException {

    SortedSet<IDInfo> infos = validateParams(min, max);
    System.out.println(path);/*  w w  w  .j  a  va 2  s.com*/
    if (inRange(min, Confidence.SUFFIX, max)) {
        if (path != null && path.toLowerCase().endsWith("doc")) {
            infos.add(new IDInfo(Confidence.SUFFIX, this, MIME));
        }
    } else if (inRange(min, Confidence.PARSE, max)) {
        InputUni uni = getInputUni();
        try {
            POIFSFileSystem pfs = new POIFSFileSystem(uni.getInputStream());
            new HWPFDocument(pfs);
            infos.add(new IDInfo(Confidence.PARSE, this, MIME));
        } catch (Exception x) {
            x.printStackTrace();
        }

    }

    return infos;
}

From source file:uk.ac.liverpool.MSOffice.MSWord.java

License:Open Source License

@Override
public Object parse(INode parent) throws Exception {

    Document doc = parent.getDocument();
    //      final StyleSheet ss = doc.getStyleSheet();
    //      CLGeneral gs = new CLGeneral();
    //      gs.setForeground(Colors.getColor(getAttr("foreground"), Color.BLACK));
    //      gs.setBackground(Colors.getColor(getAttr("background"), Color.LIGHT_GRAY));
    //      gs.setPadding(8);
    //      ss.put(doc.getName(), gs);
    InputUni uni = getInputUni();//from w w w. ja v  a  2  s .c  o m
    HWPFDocument wor = (HWPFDocument) doc.getValue("worddoc");
    if (wor == null) {
        System.out.println("new word extractor");
        POIFSFileSystem pfs = new POIFSFileSystem(uni.getInputStreamRaw());
        wor = new HWPFDocument(pfs);
        doc.putAttr("worddoc", wor);
    }

    return parseHelper(toHTML(parent), "HTML", getLayer(), parent);
}