Example usage for org.apache.poi.hwpf HWPFDocument HWPFDocument

List of usage examples for org.apache.poi.hwpf HWPFDocument HWPFDocument

Introduction

In this page you can find the example usage for org.apache.poi.hwpf HWPFDocument HWPFDocument.

Prototype

public HWPFDocument(DirectoryNode directory) throws IOException 

Source Link

Document

This constructor loads a Word document from a specific point in a POIFSFileSystem, probably not the default.

Usage

From source file:com.artech.prototype2.bardakov.utils.impl.MultiParserImpl.java

/**
 *      doc/docx//from  w w  w .  ja  v  a  2  s.  c om
 * @param FilePath -   
 * @return ?? ?
 */
private ArrayList<String> getListOfWordsFromDoc(String FilePath) {
    FileInputStream fis;
    List<String> result = new ArrayList<String>();
    if (FilePath.substring(FilePath.length() - 1).equals("x")) { //is a docx
        try {
            fis = new FileInputStream(new File(FilePath));
            XWPFDocument doc = new XWPFDocument(fis);
            XWPFWordExtractor extract = new XWPFWordExtractor(doc);
            // System.out.println(extract.getText());
            StringBuilder builder = new StringBuilder();
            builder.append(extract.getText());
            String[] words = builder.toString().split(" ");
            for (String s : words) {
                result.add(s);
            }
        } catch (IOException e) {

            e.printStackTrace();
        }
    } else { //is not a docx
        try {
            fis = new FileInputStream(new File(FilePath));
            HWPFDocument doc = new HWPFDocument(fis);
            WordExtractor extractor = new WordExtractor(doc);
            StringBuilder builder = new StringBuilder();
            builder.append(extractor.getText());
            String[] words = builder.toString().split(" ");
            for (String s : words) {
                result.add(s);
            }

        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    return (ArrayList<String>) result;
}

From source file:com.duroty.lucene.parser.MSWordParser.java

License:Open Source License

/**
 * DOCUMENT ME!/*from  ww  w .j  a  v  a 2 s .  c om*/
 *
 * @return DOCUMENT ME!
 *
 * @throws ParserException DOCUMENT ME!
 */
private String getContents() throws ParserException {
    String contents = "";

    try {
        HWPFDocument doc = new HWPFDocument(input);
        Range r = doc.getRange();
        StringBuffer buffer = new StringBuffer();

        for (int x = 0; x < r.numSections(); x++) {
            Section s = r.getSection(x);

            for (int y = 0; y < s.numParagraphs(); y++) {
                Paragraph p = null;

                try {
                    p = s.getParagraph(y);
                } catch (Exception e) {
                    buffer.append("\n");
                }

                if (p != null) {
                    for (int z = 0; z < p.numCharacterRuns(); z++) {
                        try {
                            //character run
                            CharacterRun run = p.getCharacterRun(z);

                            //character run text
                            buffer.append(run.text());
                        } catch (Exception e) {
                            buffer.append(" ");
                        }
                    }
                }

                /*if (sleep > 0) {
                    try {
                        Thread.sleep(sleep);
                    } catch (Exception ex) {
                    }
                }*/
                // use a new line at the paragraph break
                buffer.append("\n");
            }
        }

        contents = buffer.toString();
    } catch (Exception ex) {
        throw new ParserException(ex);
    }

    return contents;
}

From source file:com.example.minireader.WordViewActivity.java

License:Apache License

private void getRange() {
    FileInputStream in = null;//from   w w w.  j  a v  a2  s  .  c  o m
    POIFSFileSystem pfs = null;
    try {
        in = new FileInputStream(nameStr);
        pfs = new POIFSFileSystem(in);
        hwpf = new HWPFDocument(pfs);
    } catch (Exception e) {

    }
    range = hwpf.getRange();
    //
    pictures = hwpf.getPicturesTable().getAllPictures();

    tableIterator = new TableIterator(range);

}

From source file:com.google.gdt.handler.impl.WordHandler.java

License:Open Source License

/**
 * /*ww  w  .  j a va2 s.c om*/
 * @param inputFile
 * @param pLevel
 * @throws IOException
 * @throws InvalidFormatException
 */
@Override
public void handle(String inputFile, ProgressLevel pLevel) throws IOException, InvalidFormatException {
    String outPutFile = getOuputFileName(inputFile);
    OutputStream outputStream = new FileOutputStream(outPutFile);
    InputStream inputStream = new FileInputStream(inputFile);

    HWPFDocument hDocument = new HWPFDocument(inputStream);
    Range range = hDocument.getRange();

    pLevel.setTrFileName(outPutFile);
    pLevel.setValue(0);
    pLevel.setStringPainted(true);
    pLevel.setMaxValue(range.numParagraphs());
    int count = 0;
    for (int i = 0; i < range.numParagraphs(); i++) {
        Paragraph paragraph = range.getParagraph(i);
        int numCharRuns = paragraph.numCharacterRuns();
        for (int j = 0; j < numCharRuns; j++) {
            if (isInterrupted) {
                outputStream.close();
                new File(outPutFile).delete();
                pLevel.setString("cancelled");
                return;
            }
            CharacterRun charRun = paragraph.getCharacterRun(j);
            String inputText = charRun.text();
            if ((null == inputText) || (inputText.trim().equals("")))
                continue;
            String translatedTxt = inputText;
            //in http post method, all key value pairs are seperated with &
            if (preferenceModel.getTranslatorType() == TranslatorType.HTTP)
                inputText = inputText.replaceAll("&", "and");
            try {
                translatedTxt = translator.translate(translatedTxt);
                charRun.replaceText(inputText, translatedTxt);
            } catch (Exception e) {
                logger.log(Level.SEVERE,
                        "Input File : " + inputFile + " cannot translate the text : " + inputText, e);
            }
        }
        count++;
        pLevel.setValue(count);
    }
    pLevel.setString("done");
    hDocument.write(outputStream);
    outputStream.close();
}

From source file:com.icebreak.p2p.front.controller.trade.download.WordParse.java

@Transactional(rollbackFor = Exception.class, value = "transactionManager")
public void readwriteWord(HttpServletResponse response, HttpSession session, String _file,
        Map<String, String> map, List<Map<String, Text>> lst, LoanDemandDO loan, String downType) {
    //?word?//from w  ww  .  ja v a 2 s  . c  o  m
    FileInputStream in;
    HWPFDocument hdt = null;
    String filePath = _file;
    ServletContext application = session.getServletContext();
    String serverRealPath = application.getRealPath("/");
    String fileTemp = AppConstantsUtil.getYrdUploadFolder() + File.separator + "doc";
    File fileDir = new File(fileTemp);
    if (!fileDir.exists()) {
        fileDir.mkdir();
    }
    try {
        in = new FileInputStream(new File(serverRealPath + filePath));
        hdt = new HWPFDocument(in);
    } catch (Exception e1) {
        logger.error("??", e1);
    }

    //??word?
    Range range = hdt.getRange();
    TableIterator it = new TableIterator(range);
    Table tb = null;
    while (it.hasNext()) {
        tb = it.next();
        break;
    }
    if (lst.size() > 0) {
        for (int i = 1; i <= lst.size(); i++) {
            Map<String, Text> replaces = lst.get(i - 1);
            TableRow tr = tb.getRow(i);
            // 0
            for (int j = 0; j < tr.numCells(); j++) {
                TableCell td = tr.getCell(j);// ??
                // ??
                for (int k = 0; k < td.numParagraphs(); k++) {
                    Paragraph para = td.getParagraph(k);
                    String s = para.text();
                    final String old = s;
                    for (String key : replaces.keySet()) {
                        if (s.contains(key)) {
                            s = s.replace(key, replaces.get(key).getText());
                        }
                    }
                    if (!old.equals(s)) {// ?
                        para.replaceText(old, s);
                        s = para.text();
                    }
                } // end for
            }
        }
        for (int n = lst.size() + 1; n < tb.numRows(); n++) {
            TableRow tr = tb.getRow(n);
            tr.delete();
        }
    }

    for (Map.Entry<String, String> entry : map.entrySet()) {
        range.replaceText(entry.getKey(), entry.getValue());
    }
    //String fileName = f[f.length-1];
    String fileName = System.currentTimeMillis() + _file.substring(_file.lastIndexOf("."), _file.length());
    ByteArrayOutputStream ostream = new ByteArrayOutputStream();
    try {
        FileOutputStream out = new FileOutputStream(fileTemp + fileName);//?word
        hdt.write(ostream);
        out.write(ostream.toByteArray());
        out.flush();
        out.close();
    } catch (Exception e) {
        logger.error("?word", e);
    }
    Doc2Pdf doc2pdf = new Doc2Pdf();
    String pdfAddress = doc2pdf.createPDF(fileTemp + fileName);//wordpdf
    try {
        String fileType = "";
        if (lst.size() > 0) {//??
            fileType = "contract";
        } else {//?
            fileType = "letter";
        }
        DownloadAndPrivewFileTread downThread = new DownloadAndPrivewFileTread();
        //this.downloadAndPreviewFile(response, loan.getLoanName(), pdfAddress, downType, fileType);//
        downThread.setDownType(downType);
        downThread.setFilePath(pdfAddress);
        downThread.setResponse(response);
        downThread.setFileType(fileType);
        downThread.setProName(loan.getLoanName());
        downThread.run();
        File pdfFile = new File(pdfAddress);
        pdfFile.delete();

    } catch (Exception e) {
        logger.error("pdf", e);
    }
}

From source file:com.isotrol.impe3.idx.oc.extractors.ExtractorMsWord.java

License:Open Source License

/**
 * Extrae el texto de un fichero word.// www.  j a va2 s . c o m
 * @param in
 * @return String. Devuelve el texto crudo
 * @throws Exception
 */
public static String extractText(InputStream in) throws Exception {

    String result = "";

    HWPFDocument doc = new HWPFDocument(in);

    WordExtractor we = new WordExtractor(doc);
    result = we.getText();

    // Eliminamos los caracteres que no nos sirven para indexar.
    result = ExtractorUtil.removeControlChars(result);

    return result;
}

From source file:com.jgaap.generics.DocumentHelper.java

License:Open Source License

/**
 * Extracts text from a Word document and stores it in the document.
 * /*  w  w w .j ava2 s .  c  o m*/
 * @param inputStream
 *            An input stream pointing to the Word document to be read.
 * @throws IOException
 */
static private char[] loadMSWord(InputStream inputStream) throws IOException {
    POIFSFileSystem fs = new POIFSFileSystem(inputStream);
    HWPFDocument doc = new HWPFDocument(fs);
    WordExtractor we = new WordExtractor(doc);
    char[] origText = we.getText().toCharArray();

    return origText;
}

From source file:com.pdf.GetPdf.java

public static void docConvert(Document document, String url, String type)
        throws IOException, DocumentException {
    WordExtractor we;// ww w. j ava  2s.  com

    if (type.equals("doc")) {
        HWPFDocument wordDoc = new HWPFDocument(new URL(url).openStream());
        we = new WordExtractor(wordDoc);
        String[] paragraphs = we.getParagraphText();
        for (int i = 0; i < paragraphs.length; i++) {
            paragraphs[i] = paragraphs[i].replaceAll("\\cM?\r?\n", "");
            document.add(new Paragraph(paragraphs[i]));
        }
    } else {
        XWPFDocument wordDoc = new XWPFDocument(new URL(url).openStream());
        List<IBodyElement> contents = wordDoc.getBodyElements();

        for (IBodyElement content : contents) {
            if (content.getElementType() == BodyElementType.PARAGRAPH) {
                List<XWPFParagraph> paras = content.getBody().getParagraphs();
                for (XWPFParagraph para : paras) {
                    document.add(new Paragraph(para.getParagraphText()));
                }

            } else if (content.getElementType() == BodyElementType.TABLE) {
                List<XWPFTable> tables = content.getBody().getTables();
                for (XWPFTable table : tables) {
                    List<XWPFTableRow> rows = table.getRows();
                    for (XWPFTableRow row : rows) {
                        List<XWPFTableCell> tablecells = row.getTableCells();
                    }
                }
            }

        }
    }

}

From source file:com.thuvienkhoahoc.wordtomwtext.examples.WordToMwtext.java

License:Apache License

public static void main(String[] args) {
    try {/*from w  w  w .  jav a2  s  .  c om*/
        OutputStream out = new FileOutputStream("c:\\test.wikitext");

        new WordToMwtext(new HWPFDocument(new FileInputStream(args[0])), out);
        out.close();
    } catch (Throwable t) {
        t.printStackTrace();
    }

}

From source file:com.unsa.view.MainView.java

License:Creative Commons License

private void btnProcesarActionPerformed(java.awt.event.ActionEvent evt) throws SQLException, IOException {//GEN-FIRST:event_btnProcesarActionPerformed
    // TODO add your handling code here:
    if (lblInstitucion.getText().equals("") || lblInstitucion.getText().equals("")
            || lblIdioma.getText().equals("")) {

        JOptionPane.showMessageDialog(null, "Una de las opciones generales est vacio");
        return;/*from w  ww .  j  av a  2  s.co  m*/
    }
    if (jTextField1.getText().equals("")) {
        JOptionPane.showMessageDialog(null, "No se especific la ruta donde guardar la metadata");
        return;
    }

    jProgressBar1.setValue(0);
    jProgressBar1.setStringPainted(true);

    File[] listOfFiles = file.getSelectedFiles();
    int count = 0;

    for (File file : listOfFiles) {
        boolean archivo_daniado = false;
        if (file.isFile()) {
            System.out.println(file.getName());
            if (file.getName().substring(file.getName().length() - 1).equals("x")) { //is a docx
                try {

                    XWPFDocument doc = new XWPFDocument(new FileInputStream(file));

                    alg = new AlgorithmsWord(doc.getParagraphs());

                } catch (Exception e) {

                    archivo_daniado = true;
                }
            } else { //is not a docx
                try {

                    HWPFDocument doc = new HWPFDocument(new FileInputStream(file));

                    Range r = doc.getRange();
                    alg = new AlgorithmsWord(r);

                } catch (Exception e) {

                    try {
                        XWPFDocument doc = new XWPFDocument(new FileInputStream(file));
                        alg = new AlgorithmsWord(doc.getParagraphs());
                    } catch (Exception ex) {
                        archivo_daniado = true;
                    }
                }
            }

            Metadata metadata = null;
            if (archivo_daniado == true) {
                metadata = loadMetadataFail();
            } else {
                metadata = loadMetadata(alg);
            }
            metadata.setFileName(file.getName());
            listMetaData.add(metadata);
            int val_calculate = (count + 1) * 100 / listOfFiles.length;
            jProgressBar1.setValue(val_calculate);

            count++;

        }

    }

    String name = jTextField1.getText();

    ExcelController excel = new ExcelController(name, "UNSA", listMetaData);

    String[] lnames = { "Nombre Archivo", "Obs. Dudosa", "Obs. Critica", "Abrir Archivo" };
    DefaultTableModel model = new DefaultTableModel(lnames, 0);
    tableSalida.setModel(model);

    int contador = 0;
    for (Metadata meta : listMetaData) {
        Object[] data = new Object[4];
        data[0] = listOfFiles[contador].getName();

        if (meta.getFailGeneral()) {
            data[1] = "Fail";
            data[2] = "Fail";
        } else {
            data[1] = meta.getStadistic().getObservationGeneral() ? "Observacion" : "";
            data[2] = meta.getObservacionGeneral() ? "Falta" : "";
        }
        data[3] = "abrir";

        model.addRow(data);
        contador++;
    }

    btnAbrirMetadata.setEnabled(true);

}