List of usage examples for org.apache.poi.hwpf HWPFDocument HWPFDocument
public HWPFDocument(DirectoryNode directory) throws IOException
From source file:RefDiviedMain.java
License:Creative Commons License
public static void readMyDocument(String fileName) { POIFSFileSystem fs = null;//from www . ja v a 2s .c om try { fs = new POIFSFileSystem(new FileInputStream(fileName)); HWPFDocument doc = new HWPFDocument(fs); /** Read the content **/ Main.readParagraphs(doc, RefDiviedMain.ta, RefDiviedMain.refs); int realIndex = 0; try { for (int i = 0; i < refs.size(); i++) { String temp = refs.get(i).trim(); System.out.println(i + ":" + temp); refs.set(i, temp); if (temp == null || temp.length() < 3) { continue; } realIndex++; ta.append("\n" + "dealing with the " + i + " ref:" + refs.get(i)); if (realIndex == 1) { RefDiviedMain.isOriginal = temp; continue; } if (realIndex == 2) { title = (temp == null ? "Unknown Title" : temp); continue; } if (temp.startsWith("[a]")) { if (realIndex < 3) { RefDiviedMain.error("author tag is located at wrong place."); } if (temp.equals("[a]")) { int j = i; while (!refs.get(j).equals("[/a]")) { temp = refs.get(j).trim(); refs.set(j, temp); if (temp == null || temp.length() < 3) { j++; continue; } authors.add(refs.get(j)); j++; } i = j; continue; } else { RefDiviedMain.error("[a] must not be followed by anything.\n" + temp); } } Pattern p = Pattern.compile("^\\s{0,}\\d+/\\d+/\\d+\\s{0,}$"); if (temp.startsWith("Manuscript accepted")) { manuscriptDateAccepted = temp.replace("Manuscript accepted:", ""); Matcher m = p.matcher(manuscriptDateAccepted); if (m.find()) { continue; } else { RefDiviedMain.error( "manuscriptDateAccepted must follow by data format:dd/dd/dddd.\n" + temp); } } if (temp.startsWith("Short title")) { shortTitle = temp.replace("Short title:", ""); continue; } if (temp.startsWith("doi:")) { doi = temp.replace("doi:", ""); continue; } if (temp.startsWith("ppub:")) { ppub = temp.replace("ppub:", ""); Pattern ppub_p = Pattern.compile("^\\s{0,}\\d+/\\d+\\s{0,}$"); Matcher m = ppub_p.matcher(ppub); if (m.find()) { continue; } else { RefDiviedMain.error("ppub data format must be:dd/dd/dddd.\n" + temp); } } if (temp.startsWith("epub:")) { epub = temp.replace("epub:", ""); Matcher m = p.matcher(epub); if (m.find()) { continue; } else { RefDiviedMain.error("epub data format must be:dd/dd/dddd.\n" + temp); } } if (temp.startsWith("volume:")) { volume = temp.replace("volume:", ""); Pattern pc = Pattern.compile("^\\s{0,}\\d+\\s{0,}$"); Matcher m = pc.matcher(volume); if (m.find()) { continue; } else { RefDiviedMain.error("must only be digit number:\n" + temp); } } if (temp.startsWith("issue")) { issue = temp.replace("issue:", ""); Pattern pc = Pattern.compile("^\\s{0,}\\d+\\s{0,}$"); Matcher m = pc.matcher(issue); if (m.find()) { continue; } else { RefDiviedMain.error("must only be digit number:\n" + temp); } } if (temp.startsWith("fpage")) { fpage = temp.replace("fpage:", ""); Pattern pc = Pattern.compile("^\\s{0,}\\d+\\s{0,}$"); Matcher m = pc.matcher(fpage); if (m.find()) { continue; } else { RefDiviedMain.error("must only be digit number:\n" + temp); } } if (temp.startsWith("lpage")) { lpage = temp.replace("lpage:", ""); Pattern pc = Pattern.compile("^\\s{0,}\\d+\\s{0,}$"); Matcher m = pc.matcher(lpage); if (m.find()) { continue; } else { RefDiviedMain.error("must only be digit number:\n" + temp); } } if (temp.startsWith("date accepted")) { dateAccepted = temp.replace("date accepted:", ""); Matcher m = p.matcher(dateAccepted); if (m.find()) { continue; } else { RefDiviedMain.error("data format must be:dd/dd/dddd.\n" + temp); } } if (temp.startsWith("copyright-statement")) { copyrightStat = temp.replace("copyright-statement:", ""); continue; } if (temp.startsWith("copyright-year")) { copyrightYear = temp.replace("copyright-year:", ""); Pattern pc = Pattern.compile("^\\s{0,}\\d{4}\\s{0,}$"); Matcher m = pc.matcher(copyrightYear); if (m.find()) { continue; } else { RefDiviedMain.error("must only be 4 digit numbers:\n" + temp); } } if (temp.equalsIgnoreCase("Abstract")) { int j = i; while (!refs.get(j).trim().equals("[s1]Keywords")) { ta.append("try find out introduction " + refs.get(j) + "\n"); temp = refs.get(j).trim(); refs.set(j, temp); if (temp == null || temp.length() < 3) { j++; continue; } abstractArr.add(refs.get(j)); j++; } i = j - 1; continue; } if (temp.trim().equalsIgnoreCase("[s1]Keywords")) { int j = i; while (!refs.get(j).trim().equals("[body]")) { ta.append("try find out introduction " + refs.get(j) + "\n"); temp = refs.get(j).trim(); refs.set(j, temp); if (temp == null || temp.length() < 3) { j++; continue; } keywordArr.add(refs.get(j)); j++; } i = j - 1; continue; } if (temp.equals("[body]")) { RefDiviedMain.isValidBoday = true; int j = i; while (!refs.get(j).equals("Acknowledgement") && !refs.get(j).equals("[back]")) { ta.append("try find out introduction " + refs.get(j) + "\n"); temp = refs.get(j).trim(); refs.set(j, temp); if (temp == null || temp.length() < 3) { j++; continue; } RefDiviedMain.discussion.add(refs.get(j)); j++; } i = j - 1; continue; } if (temp.equals("Acknowledgement")) { int j = i + 1; while (!refs.get(j).equals("[back]")) { temp = refs.get(j).trim(); refs.set(j, temp); if (temp == null || temp.length() < 3) { j++; continue; } RefDiviedMain.acknowledgement.add(refs.get(j)); j++; } i = j - 1; continue; } if (temp.equals("[back]")) { RefDiviedMain.isValidBack = true; int j = i; while (!refs.get(j).equals("Table") && !refs.get(j).equals("Figure legends") && !refs.get(j).equals("References")) { temp = refs.get(j).trim(); refs.set(j, temp); if (temp == null || temp.length() < 3) { j++; continue; } RefDiviedMain.disclosure.add(refs.get(j)); j++; } i = j - 1; continue; } if (temp.equalsIgnoreCase("Table")) { int j = i + 1; while (!refs.get(j).startsWith("Figure legends") && !refs.get(j).equals("References")) { List<String> aTable = new ArrayList<String>(); refs.set(j, refs.get(j).trim()); String tempTemp = refs.get(j).trim(); System.out.println("setup table:" + tempTemp); if (refs.get(j) == null || tempTemp.length() < 3) { j++; continue; } if (!tempTemp.contains("[title]")) { RefDiviedMain.error("problem happened around " + tempTemp); //System.exit(-1); } String[] a = refs.get(j).split("\\[title\\]"); aTable.add(a[0]); String[] b = new String[2]; if (a[1].contains("footnotes")) { b = a[1].split("\\[footnotes\\]"); j++; while (!refs.get(j).startsWith("Figure legends") && !refs.get(j).equals("References") && !refs.get(j).startsWith("Table")) { b[1] += "aaaaa" + refs.get(j); j++; } j--; } else { b[0] = a[1]; b[1] = ""; } aTable.add(b[0]); aTable.add(b[1]); RefDiviedMain.table.add(aTable); j++; } i = j - 1; continue; } if (temp.equals("Figure legends")) { int j = i + 1; while (!refs.get(j).startsWith("References")) { List<String> aTable = new ArrayList<String>(); refs.set(j, refs.get(j).trim()); String tempTemp = refs.get(j).trim(); if (tempTemp == null || tempTemp.length() < 3) { j++; continue; } if (!tempTemp.contains("[legend]")) { RefDiviedMain.error("problem happened around " + tempTemp); // System.exit(-1); } String[] a = refs.get(j).split("\\[legend\\]"); aTable.add(a[0]); String[] b = a[1].split("\\[file\\]"); aTable.add(b[0]); aTable.add(b[1]); RefDiviedMain.figure.add(aTable); j++; } i = j - 1; continue; } if (temp.equals("References")) { RefDiviedMain.isValidRefs = true; i++; while (i < refs.size()) { System.out.println("adding reference before:" + refs.get(i)); String result = refs.get(i); result += "httphttp"; String secondString = null; if (i + 1 < refs.size()) { secondString = refs.get(i + 1); } System.out.println("new string 1:" + secondString); String thirdString = null; if (i + 2 < refs.size()) { thirdString = refs.get(i + 2); } System.out.println("new string 2:" + thirdString); if (secondString != null && (secondString.trim().startsWith("http://") || secondString.trim().startsWith("Http://"))) { result += secondString.trim(); System.out.println("adding second string:" + result); i++; if (thirdString != null && (thirdString.trim().startsWith("http://") || thirdString.trim().startsWith("Http://"))) { result += thirdString.trim(); System.out.println("adding third string:" + result); i++; } } result = result.replaceAll("http://dx.doi.org/", "aaaaadoi"); result = result.replaceAll("Http://dx.doi.org/", "aaaaadoi"); result = result.replaceAll("http://www.ncbi.nlm.nih.gov/pubmed/", "aaaaapmid"); result = result.replaceAll("Http://www.ncbi.nlm.nih.gov/pubmed/", "aaaaapmid"); System.out.println("adding reference after:" + result); RefDiviedMain.references.add(result); i++; } } //doit(refs.get(i), i); } } catch (Exception e) { // TODO Auto-generated catch block ta.append("\nerrors happen:\n"); ta.append(e.getMessage() + "\n"); } int pageNumber = 1; /** We will try reading the header for page 1**/ //readHeader(doc, pageNumber); /** Let's try reading the footer for page 1**/ //readFooter(doc, pageNumber); /** Read the document summary**/ //readDocumentSummary(doc); } catch (Exception e) { ta.append(e.getMessage()); } }
From source file:NewEmptyJUnitTest.java
/** * Tests that we can work with both {@link POIFSFileSystem} * and {@link NPOIFSFileSystem}//from w w w .jav a 2s . com */ public void testDifferentPOIFS() throws Exception { POIDataSamples docTests = POIDataSamples.getDocumentInstance(); // Open the two filesystems DirectoryNode[] files = new DirectoryNode[2]; files[0] = (new POIFSFileSystem(docTests.openResourceAsStream("test2.doc"))).getRoot(); NPOIFSFileSystem npoifsFileSystem = new NPOIFSFileSystem(docTests.getFile("test2.doc")); files[1] = npoifsFileSystem.getRoot(); // Open directly for (DirectoryNode dir : files) { WordExtractor extractor = new WordExtractor(dir); assertEquals(p_text1_block, extractor.getText()); } // Open via a HWPFDocument for (DirectoryNode dir : files) { HWPFDocument doc = new HWPFDocument(dir); WordExtractor extractor = new WordExtractor(doc); assertEquals(p_text1_block, extractor.getText()); } npoifsFileSystem.close(); }
From source file:RefSouceOnlyMain.java
License:Creative Commons License
public static void readMyDocument(String fileName) { POIFSFileSystem fs = null;//from w ww. j ava2 s .c o m try { fs = new POIFSFileSystem(new FileInputStream(fileName)); HWPFDocument doc = new HWPFDocument(fs); /** Read the content **/ Main.readParagraphs(doc, RefSouceOnlyMain.ta, RefSouceOnlyMain.refs); int realIndex = 0; try { for (int i = 0; i < refs.size(); i++) { String temp = refs.get(i).trim(); System.out.println(i + ":" + temp); refs.set(i, temp); if (temp == null || temp.length() < 3) { continue; } realIndex++; ta.append("\n" + "dealing with the " + i + " ref:" + refs.get(i)); if (realIndex == 1) { RefSouceOnlyMain.isOriginal = temp; continue; } if (realIndex == 2) { title = (temp == null ? "Unknown Title" : temp); continue; } if (temp.startsWith("[a]")) { if (realIndex < 3) { RefSouceOnlyMain.error("author tag is located at wrong place."); } if (temp.equals("[a]")) { int j = i; while (!refs.get(j).equals("[/a]")) { temp = refs.get(j).trim(); refs.set(j, temp); if (temp == null || temp.length() < 3) { j++; continue; } authors.add(refs.get(j)); j++; } i = j; continue; } else { RefSouceOnlyMain.error("[a] must not be followed by anything.\n" + temp); } } Pattern p = Pattern.compile("^\\s{0,}\\d+/\\d+/\\d+\\s{0,}$"); if (temp.startsWith("Manuscript accepted")) { manuscriptDateAccepted = temp.replace("Manuscript accepted:", ""); Matcher m = p.matcher(manuscriptDateAccepted); if (m.find()) { continue; } else { RefSouceOnlyMain.error( "manuscriptDateAccepted must follow by data format:dd/dd/dddd.\n" + temp); } } if (temp.startsWith("Short title")) { shortTitle = temp.replace("Short title:", ""); continue; } if (temp.startsWith("doi:")) { doi = temp.replace("doi:", ""); continue; } if (temp.startsWith("ppub:")) { ppub = temp.replace("ppub:", ""); Pattern ppub_p = Pattern.compile("^\\s{0,}\\d+/\\d+\\s{0,}$"); Matcher m = ppub_p.matcher(ppub); if (m.find()) { continue; } else { RefSouceOnlyMain.error("ppub data format must be:dd/dd/dddd.\n" + temp); } } if (temp.startsWith("epub:")) { epub = temp.replace("epub:", ""); Matcher m = p.matcher(epub); if (m.find()) { continue; } else { RefSouceOnlyMain.error("epub data format must be:dd/dd/dddd.\n" + temp); } } if (temp.startsWith("volume:")) { volume = temp.replace("volume:", ""); Pattern pc = Pattern.compile("^\\s{0,}\\d+\\s{0,}$"); Matcher m = pc.matcher(volume); if (m.find()) { continue; } else { RefSouceOnlyMain.error("must only be digit number:\n" + temp); } } if (temp.startsWith("issue")) { issue = temp.replace("issue:", ""); Pattern pc = Pattern.compile("^\\s{0,}\\d+\\s{0,}$"); Matcher m = pc.matcher(issue); if (m.find()) { continue; } else { RefSouceOnlyMain.error("must only be digit number:\n" + temp); } } if (temp.startsWith("fpage")) { fpage = temp.replace("fpage:", ""); Pattern pc = Pattern.compile("^\\s{0,}\\d+\\s{0,}$"); Matcher m = pc.matcher(fpage); if (m.find()) { continue; } else { RefSouceOnlyMain.error("must only be digit number:\n" + temp); } } if (temp.startsWith("lpage")) { lpage = temp.replace("lpage:", ""); Pattern pc = Pattern.compile("^\\s{0,}\\d+\\s{0,}$"); Matcher m = pc.matcher(lpage); if (m.find()) { continue; } else { RefSouceOnlyMain.error("must only be digit number:\n" + temp); } } if (temp.startsWith("date accepted")) { dateAccepted = temp.replace("date accepted:", ""); Matcher m = p.matcher(dateAccepted); if (m.find()) { continue; } else { RefSouceOnlyMain.error("data format must be:dd/dd/dddd.\n" + temp); } } if (temp.startsWith("copyright-statement")) { copyrightStat = temp.replace("copyright-statement:", ""); continue; } if (temp.startsWith("copyright-year")) { copyrightYear = temp.replace("copyright-year:", ""); Pattern pc = Pattern.compile("^\\s{0,}\\d{4}\\s{0,}$"); Matcher m = pc.matcher(copyrightYear); if (m.find()) { continue; } else { RefSouceOnlyMain.error("must only be 4 digit numbers:\n" + temp); } } if (temp.equalsIgnoreCase("Abstract")) { int j = i; while (!refs.get(j).trim().equals("[s1]Keywords")) { ta.append("try find out introduction " + refs.get(j) + "\n"); temp = refs.get(j).trim(); refs.set(j, temp); if (temp == null || temp.length() < 3) { j++; continue; } abstractArr.add(refs.get(j)); j++; } i = j - 1; continue; } if (temp.trim().equalsIgnoreCase("[s1]Keywords")) { int j = i; while (!refs.get(j).trim().equals("[body]")) { ta.append("try find out introduction " + refs.get(j) + "\n"); temp = refs.get(j).trim(); refs.set(j, temp); if (temp == null || temp.length() < 3) { j++; continue; } keywordArr.add(refs.get(j)); j++; } i = j - 1; continue; } if (temp.equals("[body]")) { RefSouceOnlyMain.isValidBoday = true; int j = i; while (!refs.get(j).equals("Acknowledgement") && !refs.get(j).equals("[back]")) { ta.append("try find out introduction " + refs.get(j) + "\n"); temp = refs.get(j).trim(); refs.set(j, temp); if (temp == null || temp.length() < 3) { j++; continue; } RefSouceOnlyMain.discussion.add(refs.get(j)); j++; } i = j - 1; continue; } if (temp.equals("Acknowledgement")) { int j = i + 1; while (!refs.get(j).equals("[back]")) { temp = refs.get(j).trim(); refs.set(j, temp); if (temp == null || temp.length() < 3) { j++; continue; } RefSouceOnlyMain.acknowledgement.add(refs.get(j)); j++; } i = j - 1; continue; } if (temp.equals("[back]")) { RefSouceOnlyMain.isValidBack = true; int j = i; while (!refs.get(j).equals("Table") && !refs.get(j).equals("Figure legends") && !refs.get(j).equals("References")) { temp = refs.get(j).trim(); refs.set(j, temp); if (temp == null || temp.length() < 3) { j++; continue; } RefSouceOnlyMain.disclosure.add(refs.get(j)); j++; } i = j - 1; continue; } if (temp.equalsIgnoreCase("Table")) { int j = i + 1; while (!refs.get(j).startsWith("Figure legends") && !refs.get(j).equals("References")) { List<String> aTable = new ArrayList<String>(); refs.set(j, refs.get(j).trim()); String tempTemp = refs.get(j).trim(); System.out.println("setup table:" + tempTemp); if (refs.get(j) == null || tempTemp.length() < 3) { j++; continue; } if (!tempTemp.contains("[title]")) { RefSouceOnlyMain.error("problem happened around " + tempTemp); //System.exit(-1); } String[] a = refs.get(j).split("\\[title\\]"); aTable.add(a[0]); String[] b = new String[2]; if (a[1].contains("footnotes")) { b = a[1].split("\\[footnotes\\]"); j++; while (!refs.get(j).startsWith("Figure legends") && !refs.get(j).equals("References") && !refs.get(j).startsWith("Table")) { b[1] += "aaaaa" + refs.get(j); j++; } j--; } else { b[0] = a[1]; b[1] = ""; } aTable.add(b[0]); aTable.add(b[1]); RefSouceOnlyMain.table.add(aTable); j++; } i = j - 1; continue; } if (temp.equals("Figure legends")) { int j = i + 1; while (!refs.get(j).startsWith("References")) { List<String> aTable = new ArrayList<String>(); refs.set(j, refs.get(j).trim()); String tempTemp = refs.get(j).trim(); if (tempTemp == null || tempTemp.length() < 3) { j++; continue; } if (!tempTemp.contains("[legend]")) { RefSouceOnlyMain.error("problem happened around " + tempTemp); // System.exit(-1); } String[] a = refs.get(j).split("\\[legend\\]"); aTable.add(a[0]); String[] b = a[1].split("\\[file\\]"); aTable.add(b[0]); aTable.add(b[1]); RefSouceOnlyMain.figure.add(aTable); j++; } i = j - 1; continue; } if (temp.equals("References")) { RefSouceOnlyMain.isValidRefs = true; i++; while (i < refs.size()) { System.out.println("adding reference:" + refs.get(i)); String result = refs.get(i); result += "httphttp"; String secondString = null; if (i + 1 < refs.size()) { secondString = refs.get(i + 1); } System.out.println("new string 1:" + secondString); String thirdString = null; if (i + 2 < refs.size()) { thirdString = refs.get(i + 2); } if (secondString != null && (secondString.trim().startsWith("http://") || secondString.trim().startsWith("Http://"))) { result += secondString.trim(); i++; if (thirdString != null && (thirdString.trim().startsWith("http://") || thirdString.trim().startsWith("Http://"))) { result += thirdString.trim(); i++; } } result = result.replaceAll("http://dx.doi.org/", "aaaaadoi"); result = result.replaceAll("Http://dx.doi.org/", "aaaaadoi"); result = result.replaceAll("http://www.ncbi.nlm.nih.gov/pubmed/", "aaaaapmid"); result = result.replaceAll("Http://www.ncbi.nlm.nih.gov/pubmed/", "aaaaapmid"); RefSouceOnlyMain.references.add(result); i++; } } //doit(refs.get(i), i); } } catch (Exception e) { // TODO Auto-generated catch block ta.append("\nerrors happen:\n"); ta.append(e.getMessage() + "\n"); } int pageNumber = 1; /** We will try reading the header for page 1**/ //readHeader(doc, pageNumber); /** Let's try reading the footer for page 1**/ //readFooter(doc, pageNumber); /** Read the document summary**/ //readDocumentSummary(doc); } catch (Exception e) { ta.append(e.getMessage()); } }
From source file:at.tugraz.sss.serv.SSFileU.java
License:Apache License
public static void writePDFFromDoc(final String docFilePath, final String pdfFilePath) throws Exception { final Document document = new Document(); final POIFSFileSystem fs = new POIFSFileSystem(openFileForRead(docFilePath)); final HWPFDocument word = new HWPFDocument(fs); final WordExtractor we = new WordExtractor(word); final OutputStream out = openOrCreateFileWithPathForWrite(pdfFilePath); final PdfWriter writer = PdfWriter.getInstance(document, out); final Range range = word.getRange(); document.open();/*from www. ja v a2 s .c o m*/ writer.setPageEmpty(true); document.newPage(); writer.setPageEmpty(true); String[] paragraphs = we.getParagraphText(); for (int i = 0; i < paragraphs.length; i++) { org.apache.poi.hwpf.usermodel.Paragraph pr = range.getParagraph(i); // CharacterRun run = pr.getCharacterRun(i); // run.setBold(true); // run.setCapitalized(true); // run.setItalic(true); paragraphs[i] = paragraphs[i].replaceAll("\\cM?\r?\n", ""); System.out.println("Length:" + paragraphs[i].length()); System.out.println("Paragraph" + i + ": " + paragraphs[i].toString()); // add the paragraph to the document document.add(new Paragraph(paragraphs[i])); } document.close(); }
From source file:at.tugraz.sss.serv.util.SSFileU.java
License:Apache License
public static void writePDFFromDoc(final String docFilePath, final String pdfFilePath) throws SSErr { try {/*from w ww . j a v a 2 s . c om*/ final Document document = new Document(); final POIFSFileSystem fs = new POIFSFileSystem(openFileForRead(docFilePath)); final HWPFDocument word = new HWPFDocument(fs); final WordExtractor we = new WordExtractor(word); final OutputStream out = openOrCreateFileWithPathForWrite(pdfFilePath); final PdfWriter writer = PdfWriter.getInstance(document, out); final Range range = word.getRange(); document.open(); writer.setPageEmpty(true); document.newPage(); writer.setPageEmpty(true); String[] paragraphs = we.getParagraphText(); for (int i = 0; i < paragraphs.length; i++) { org.apache.poi.hwpf.usermodel.Paragraph pr = range.getParagraph(i); // CharacterRun run = pr.getCharacterRun(i); // run.setBold(true); // run.setCapitalized(true); // run.setItalic(true); paragraphs[i] = paragraphs[i].replaceAll("\\cM?\r?\n", ""); System.out.println("Length:" + paragraphs[i].length()); System.out.println("Paragraph" + i + ": " + paragraphs[i].toString()); // add the paragraph to the document document.add(new Paragraph(paragraphs[i])); } document.close(); } catch (Exception error) { SSServErrReg.regErrThrow(error); } }
From source file:b01.officeLink.ExtendedWordDocument.java
License:Apache License
public ExtendedWordDocument(InputStream istream) throws IOException { try {//from w w w . ja v a 2 s . c o m xwpfDocument = new XWPFDocument(istream); } catch (Exception e) { xwpfDocument = null; Globals.logString("Could not read EXCEL file as xlsx\n" + (e != null ? e.getMessage() : "")); try { hwpfDocument = new HWPFDocument(istream); } catch (Exception e1) { hwpfDocument = null; Globals.logException(e1); } } }
From source file:b01.officeLink.ExtendedWordDocument.java
License:Apache License
public ExtendedWordDocument(POIFSFileSystem pfilesystem) throws IOException { hwpfDocument = new HWPFDocument(pfilesystem); }
From source file:br.com.schumaker.beta.doc.ReadDocMaster.java
public static void main(String[] args) { try {/*from ww w.jav a2 s .c o m*/ File file = new File( "/users/hudsonschumaker/downloads/Guisi01206us - Jira Guide for P3 PECB enhancement requests.doc"); FileInputStream fis = new FileInputStream(file.getAbsolutePath()); HWPFDocument doc = new HWPFDocument(fis); WordExtractor extractor = new WordExtractor(doc); for (String rawText : extractor.getParagraphText()) { String text = extractor.stripFields(rawText); if (text.length() > 10) System.out.println(text.trim()); } } catch (Exception exep) { } }
From source file:br.com.schumaker.beta.doc.ReadFile.java
public static void main(String[] args) { WordExtractor extractor = null;// w w w. j a v a2 s . c om try { File file = new File( "/users/hudsonschumaker/downloads/Guisi01206us - Jira Guide for P3 PECB enhancement requests.doc"); FileInputStream fis = new FileInputStream(file.getAbsolutePath()); HWPFDocument doc = new HWPFDocument(fis); String text = doc.getDocumentText(); System.out.println(text); } catch (Exception exep) { } }
From source file:com.anphat.customer.controller.ExportContractToDocController.java
private void getData() throws Exception { try {// ww w .j a v a2 s . c om documentDoc = new HWPFDocument(new FileInputStream(Constants.PATH_TEMPLATE + fileName)); lstTableDoc = documentDoc.getListTables(); } catch (Exception e) { throw new Exception("Khng ?c c file biu mu"); } }