List of usage examples for org.apache.poi.hwpf HWPFDocument HWPFDocument
public HWPFDocument(DirectoryNode directory) throws IOException
From source file:com.unsa.view.MainView.java
License:Creative Commons License
private void DocConverterPDF(File file1) { NPOIFSFileSystem fs = null;/*from w w w. j av a2 s . co m*/ com.lowagie.text.Document document = new com.lowagie.text.Document(); try { System.out.println(file1.getAbsolutePath()); fs = new NPOIFSFileSystem(new FileInputStream(file1.getAbsolutePath())); HWPFDocument doc = new HWPFDocument(fs.getRoot()); WordExtractor we = new WordExtractor(doc); String output = file1.getAbsolutePath().substring(0, file1.getAbsolutePath().length() - 3); OutputStream fileout = new FileOutputStream(new File(output + "pdf")); PdfWriter writer = PdfWriter.getInstance(document, fileout); Range range = doc.getRange(); document.open(); writer.setPageEmpty(true); document.newPage(); writer.setPageEmpty(true); String[] paragraphs = we.getParagraphText(); for (int i = 0; i < paragraphs.length; i++) { org.apache.poi.hwpf.usermodel.Paragraph pr = range.getParagraph(i); paragraphs[i] = paragraphs[i].replaceAll("\\cM?\r?\n", ""); document.add(new Paragraph(paragraphs[i])); } } catch (Exception e) { e.printStackTrace(); } finally { document.close(); } }
From source file:com.virtusa.isq.vtaf.runtime.SeleniumTestBase.java
License:Apache License
/** * Read doc file./*w w w .j a v a 2s. c o m*/ * * @param fileName * the file name * @return the string */ public final String readDocFile(final String fileName) { String docContent = null; try { File file = new File(fileName); FileInputStream fis = new FileInputStream(file.getAbsolutePath()); HWPFDocument doc = new HWPFDocument(fis); WordExtractor we = new WordExtractor(doc); docContent = we.getText(); System.out.println("MS Word(.doc) Document Red, Content:" + docContent); fis.close(); } catch (IOException e) { e.printStackTrace(); reportresult(true, "CHECK DOCUMENT :", "FAILED", "CheckDocument command NODECOUNT : Execption occured. Actual error : " + e.getMessage()); checkTrue(false, false, "CheckDocument command NODECOUNT : Execption occured. Actual error : " + e.getMessage()); } return docContent; }
From source file:com.weibo.datasys.parser.office.extractor.WordParse.java
License:Open Source License
public FileData extractorDoc(File filePath) { FileData fData = new FileData(); fData.setName(filePath.getName());//from ww w.j ava2s .c om HWPFDocument doc = null; try { doc = new HWPFDocument(new FileInputStream(filePath)); fData.setContent(doc.getRange().text()); } catch (Exception e) { LOG.error("", e); } return fData; }
From source file:com.xpn.xwiki.plugin.lucene.textextraction.MSWordTextExtractor.java
License:Apache License
public String getText(byte[] data) throws Exception { HWPFDocument wordDoc = new HWPFDocument(new ByteArrayInputStream(data)); Range range = wordDoc.getRange();/*from ww w . j a v a2 s .c om*/ return range.text(); }
From source file:com.xx.platform.util.tools.ms.WordExtractor.java
License:Apache License
/** * Create a new Word Extractor//from www . j a va 2s.co m * @param fs POIFSFileSystem containing the word file */ public WordExtractor(POIFSFileSystem fs) throws IOException { this(new HWPFDocument(fs)); }
From source file:com.xx.platform.util.tools.ms.WordExtractor.java
License:Apache License
public String extractText(POIFSFileSystem poifs) throws Exception { this.doc = new HWPFDocument(poifs); return getText(); }
From source file:com.zhch.example.poi.Word2Forrest.java
License:Apache License
public static void main(String[] args) throws IOException { InputStream is = new FileInputStream("d:\\temp\\d3\\luyang.doc"); OutputStream out = new FileOutputStream("d:\\temp\\d3\\test.xml"); try {//from ww w. j a v a2 s. c o m new Word2Forrest(new HWPFDocument(is), out); } finally { out.close(); is.close(); } System.out.println("over!!"); }
From source file:cv_extractor.DocReader.java
protected static void readDocFile(File localFile) { try {/* www. j av a 2 s. co m*/ //Create a input stream to read file FileInputStream fis = new FileInputStream(localFile.getAbsolutePath()); //For reading docx files HWPFDocument doc = new HWPFDocument(fis); WordExtractor we = new WordExtractor(doc); String[] paragraphs = we.getParagraphText(); System.out.println("Total no of paragraph " + paragraphs.length); for (String para : paragraphs) { //Compile the regex defined above Pattern r = Pattern.compile(pattern); //Check if any string matches the compiled pattern Matcher m = r.matcher(para); if (m.find()) { //m.group() Returns the input subsequence matched by the previous match data.add(m.group()); } } fis.close(); } catch (Exception e) { e.printStackTrace(); } }
From source file:de.uni_siegen.wineme.come_in.thumbnailer.util.mime.DocFileIdentifier.java
License:Open Source License
@Override public String identify(String mimeType, byte[] bytes, File file) { if (isOfficeFile(mimeType) && !DOC_MIME_TYPE.equals(mimeType)) { try {/*from w w w. ja v a2 s . co m*/ FileInputStream stream = new FileInputStream(file); HWPFDocument document = new HWPFDocument(stream); if (document.getRange().getEndOffset() > 0) { return DOC_MIME_TYPE; } } catch (Throwable e) { } } return mimeType; }
From source file:edu.temple.CIS3238.readEasyUI.java
private static String[] readMyDocument(String fileName) { POIFSFileSystem fs = null;/*from w ww .ja v a 2s . co m*/ String text = null; String strArray[] = null; try { fs = new POIFSFileSystem(new FileInputStream(fileName)); HWPFDocument doc = new HWPFDocument(fs); /** Read the content **/ text = readParagraphs(doc); int pageNumber = 1; /** We will try reading the header for page 1**/ //readHeader(doc, pageNumber); /** Let's try reading the footer for page 1**/ //readFooter(doc, pageNumber); /** Read the document summary**/ //readDocumentSummary(doc); strArray = text.split(" "); } catch (Exception e) { e.printStackTrace(); } return strArray; }